Wonder-Griffin
commited on
Training in progress, step 5
Browse files- autotrain-data/dataset_dict.json +1 -0
- autotrain-data/train/data-00000-of-00006.arrow +3 -0
- autotrain-data/train/data-00001-of-00006.arrow +3 -0
- autotrain-data/train/data-00002-of-00006.arrow +3 -0
- autotrain-data/train/data-00003-of-00006.arrow +3 -0
- autotrain-data/train/data-00004-of-00006.arrow +3 -0
- autotrain-data/train/data-00005-of-00006.arrow +3 -0
- autotrain-data/train/dataset_info.json +12 -0
- autotrain-data/train/state.json +28 -0
- autotrain-data/validation/data-00000-of-00006.arrow +3 -0
- autotrain-data/validation/data-00001-of-00006.arrow +3 -0
- autotrain-data/validation/data-00002-of-00006.arrow +3 -0
- autotrain-data/validation/data-00003-of-00006.arrow +3 -0
- autotrain-data/validation/data-00004-of-00006.arrow +3 -0
- autotrain-data/validation/data-00005-of-00006.arrow +3 -0
- autotrain-data/validation/dataset_info.json +12 -0
- autotrain-data/validation/state.json +28 -0
- config.json +37 -0
- dataset_dict.json +1 -0
- model.safetensors +3 -0
- special_tokens_map.json +37 -0
- tokenizer.json +0 -0
- tokenizer_config.json +55 -0
- train/data-00000-of-00006.arrow +3 -0
- train/data-00001-of-00006.arrow +3 -0
- train/data-00002-of-00006.arrow +3 -0
- train/data-00003-of-00006.arrow +3 -0
- train/data-00004-of-00006.arrow +3 -0
- train/data-00005-of-00006.arrow +3 -0
- train/dataset_info.json +12 -0
- train/state.json +28 -0
- training_args.bin +3 -0
- training_params (2).json +49 -0
- training_params.json +49 -0
- validation/data-00000-of-00006.arrow +3 -0
- validation/data-00001-of-00006.arrow +3 -0
- validation/data-00002-of-00006.arrow +3 -0
- validation/data-00003-of-00006.arrow +3 -0
- validation/data-00004-of-00006.arrow +3 -0
- validation/data-00005-of-00006.arrow +3 -0
- validation/dataset_info.json +12 -0
- validation/state.json +28 -0
- vocab.txt +0 -0
autotrain-data/dataset_dict.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"splits": ["train", "validation"]}
|
autotrain-data/train/data-00000-of-00006.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0fcff7033322edc5457a872e7102d5b73008886f82298fd3aa8500e31f0e1898
|
3 |
+
size 612540528
|
autotrain-data/train/data-00001-of-00006.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:80d97f6a51a5deffb0ddf7c0df4b6305c829b90728e941d263fab64ad33c82db
|
3 |
+
size 582202992
|
autotrain-data/train/data-00002-of-00006.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:962772a8f12b6f501e35d580958aa39e405034839e107981c2ff0c5af4852a6c
|
3 |
+
size 469873904
|
autotrain-data/train/data-00003-of-00006.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:22969288efa339ddb93f3b0c96cfa6cd495c71696c4468e4c9adfd13bf7228dd
|
3 |
+
size 141271728
|
autotrain-data/train/data-00004-of-00006.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:27a151ebc8fb3be6e3969d48d651bfeb648146975ef5be3814f64efe51e6691f
|
3 |
+
size 491365808
|
autotrain-data/train/data-00005-of-00006.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e416d92cff19f2e316a49d2bf75471f787dfdcf08d35a1bec581d0dbbdebded3
|
3 |
+
size 220452656
|
autotrain-data/train/dataset_info.json
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"citation": "",
|
3 |
+
"description": "",
|
4 |
+
"features": {
|
5 |
+
"autotrain_text": {
|
6 |
+
"dtype": "string",
|
7 |
+
"_type": "Value"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"homepage": "",
|
11 |
+
"license": ""
|
12 |
+
}
|
autotrain-data/train/state.json
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_data_files": [
|
3 |
+
{
|
4 |
+
"filename": "data-00000-of-00006.arrow"
|
5 |
+
},
|
6 |
+
{
|
7 |
+
"filename": "data-00001-of-00006.arrow"
|
8 |
+
},
|
9 |
+
{
|
10 |
+
"filename": "data-00002-of-00006.arrow"
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"filename": "data-00003-of-00006.arrow"
|
14 |
+
},
|
15 |
+
{
|
16 |
+
"filename": "data-00004-of-00006.arrow"
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"filename": "data-00005-of-00006.arrow"
|
20 |
+
}
|
21 |
+
],
|
22 |
+
"_fingerprint": "92455bc5ac963ada",
|
23 |
+
"_format_columns": null,
|
24 |
+
"_format_kwargs": {},
|
25 |
+
"_format_type": null,
|
26 |
+
"_output_all_columns": false,
|
27 |
+
"_split": null
|
28 |
+
}
|
autotrain-data/validation/data-00000-of-00006.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0fcff7033322edc5457a872e7102d5b73008886f82298fd3aa8500e31f0e1898
|
3 |
+
size 612540528
|
autotrain-data/validation/data-00001-of-00006.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:80d97f6a51a5deffb0ddf7c0df4b6305c829b90728e941d263fab64ad33c82db
|
3 |
+
size 582202992
|
autotrain-data/validation/data-00002-of-00006.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:962772a8f12b6f501e35d580958aa39e405034839e107981c2ff0c5af4852a6c
|
3 |
+
size 469873904
|
autotrain-data/validation/data-00003-of-00006.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:22969288efa339ddb93f3b0c96cfa6cd495c71696c4468e4c9adfd13bf7228dd
|
3 |
+
size 141271728
|
autotrain-data/validation/data-00004-of-00006.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:27a151ebc8fb3be6e3969d48d651bfeb648146975ef5be3814f64efe51e6691f
|
3 |
+
size 491365808
|
autotrain-data/validation/data-00005-of-00006.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e416d92cff19f2e316a49d2bf75471f787dfdcf08d35a1bec581d0dbbdebded3
|
3 |
+
size 220452656
|
autotrain-data/validation/dataset_info.json
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"citation": "",
|
3 |
+
"description": "",
|
4 |
+
"features": {
|
5 |
+
"autotrain_text": {
|
6 |
+
"dtype": "string",
|
7 |
+
"_type": "Value"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"homepage": "",
|
11 |
+
"license": ""
|
12 |
+
}
|
autotrain-data/validation/state.json
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_data_files": [
|
3 |
+
{
|
4 |
+
"filename": "data-00000-of-00006.arrow"
|
5 |
+
},
|
6 |
+
{
|
7 |
+
"filename": "data-00001-of-00006.arrow"
|
8 |
+
},
|
9 |
+
{
|
10 |
+
"filename": "data-00002-of-00006.arrow"
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"filename": "data-00003-of-00006.arrow"
|
14 |
+
},
|
15 |
+
{
|
16 |
+
"filename": "data-00004-of-00006.arrow"
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"filename": "data-00005-of-00006.arrow"
|
20 |
+
}
|
21 |
+
],
|
22 |
+
"_fingerprint": "92455bc5ac963ada",
|
23 |
+
"_format_columns": null,
|
24 |
+
"_format_kwargs": {},
|
25 |
+
"_format_type": null,
|
26 |
+
"_output_all_columns": false,
|
27 |
+
"_split": null
|
28 |
+
}
|
config.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "Wonder-Griffin/TraXLMistral",
|
3 |
+
"architectures": [
|
4 |
+
"TraXLMistralForCausalLM"
|
5 |
+
],
|
6 |
+
"dropout": 0.1,
|
7 |
+
"dynamic_routing": true,
|
8 |
+
"ff_expansion_factor": 4,
|
9 |
+
"hidden_size": 768,
|
10 |
+
"id2label": {
|
11 |
+
"0": "LABEL_0",
|
12 |
+
"1": "LABEL_1",
|
13 |
+
"2": "LABEL_2",
|
14 |
+
"3": "LABEL_3",
|
15 |
+
"4": "LABEL_4"
|
16 |
+
},
|
17 |
+
"is_decoder": true,
|
18 |
+
"label2id": {
|
19 |
+
"LABEL_0": 0,
|
20 |
+
"LABEL_1": 1,
|
21 |
+
"LABEL_2": 2,
|
22 |
+
"LABEL_3": 3,
|
23 |
+
"LABEL_4": 4
|
24 |
+
},
|
25 |
+
"max_computation_steps": 5,
|
26 |
+
"max_len": 256,
|
27 |
+
"memory_size": 256,
|
28 |
+
"model_type": "TraXLMistral",
|
29 |
+
"n_embd": 128,
|
30 |
+
"n_head": 4,
|
31 |
+
"n_layer": 4,
|
32 |
+
"rnn_units": 128,
|
33 |
+
"sparse_attention": true,
|
34 |
+
"torch_dtype": "float32",
|
35 |
+
"transformers_version": "4.44.2",
|
36 |
+
"vocab_size": 50257
|
37 |
+
}
|
dataset_dict.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"splits": ["train", "validation"]}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c651e8f83effcdecd7724aba603a9b4c48743ec9ab6ee60fde6cd4e93e08178e
|
3 |
+
size 194522976
|
special_tokens_map.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": {
|
3 |
+
"content": "[CLS]",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"mask_token": {
|
10 |
+
"content": "[MASK]",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "[PAD]",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"sep_token": {
|
24 |
+
"content": "[SEP]",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"unk_token": {
|
31 |
+
"content": "[UNK]",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
}
|
37 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_lower_case": true,
|
47 |
+
"mask_token": "[MASK]",
|
48 |
+
"model_max_length": 512,
|
49 |
+
"pad_token": "[PAD]",
|
50 |
+
"sep_token": "[SEP]",
|
51 |
+
"strip_accents": null,
|
52 |
+
"tokenize_chinese_chars": true,
|
53 |
+
"tokenizer_class": "BertTokenizer",
|
54 |
+
"unk_token": "[UNK]"
|
55 |
+
}
|
train/data-00000-of-00006.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0fcff7033322edc5457a872e7102d5b73008886f82298fd3aa8500e31f0e1898
|
3 |
+
size 612540528
|
train/data-00001-of-00006.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:80d97f6a51a5deffb0ddf7c0df4b6305c829b90728e941d263fab64ad33c82db
|
3 |
+
size 582202992
|
train/data-00002-of-00006.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:962772a8f12b6f501e35d580958aa39e405034839e107981c2ff0c5af4852a6c
|
3 |
+
size 469873904
|
train/data-00003-of-00006.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:22969288efa339ddb93f3b0c96cfa6cd495c71696c4468e4c9adfd13bf7228dd
|
3 |
+
size 141271728
|
train/data-00004-of-00006.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:27a151ebc8fb3be6e3969d48d651bfeb648146975ef5be3814f64efe51e6691f
|
3 |
+
size 491365808
|
train/data-00005-of-00006.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e416d92cff19f2e316a49d2bf75471f787dfdcf08d35a1bec581d0dbbdebded3
|
3 |
+
size 220452656
|
train/dataset_info.json
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"citation": "",
|
3 |
+
"description": "",
|
4 |
+
"features": {
|
5 |
+
"autotrain_text": {
|
6 |
+
"dtype": "string",
|
7 |
+
"_type": "Value"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"homepage": "",
|
11 |
+
"license": ""
|
12 |
+
}
|
train/state.json
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_data_files": [
|
3 |
+
{
|
4 |
+
"filename": "data-00000-of-00006.arrow"
|
5 |
+
},
|
6 |
+
{
|
7 |
+
"filename": "data-00001-of-00006.arrow"
|
8 |
+
},
|
9 |
+
{
|
10 |
+
"filename": "data-00002-of-00006.arrow"
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"filename": "data-00003-of-00006.arrow"
|
14 |
+
},
|
15 |
+
{
|
16 |
+
"filename": "data-00004-of-00006.arrow"
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"filename": "data-00005-of-00006.arrow"
|
20 |
+
}
|
21 |
+
],
|
22 |
+
"_fingerprint": "92455bc5ac963ada",
|
23 |
+
"_format_columns": null,
|
24 |
+
"_format_kwargs": {},
|
25 |
+
"_format_type": null,
|
26 |
+
"_output_all_columns": false,
|
27 |
+
"_split": null
|
28 |
+
}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:458b0eb6720a39b1bb5d10fe01f32483acef761bfe7604e11cca92e3e2cf83d6
|
3 |
+
size 5176
|
training_params (2).json
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": "Wonder-Griffin/TraXLMistral",
|
3 |
+
"project_name": "TrainTraXLLMistral-1",
|
4 |
+
"data_path": "TrainTraXLLMistral-1/autotrain-data",
|
5 |
+
"train_split": "train",
|
6 |
+
"valid_split": null,
|
7 |
+
"add_eos_token": true,
|
8 |
+
"block_size": 512,
|
9 |
+
"model_max_length": 512,
|
10 |
+
"padding": "right",
|
11 |
+
"trainer": "default",
|
12 |
+
"use_flash_attention_2": true,
|
13 |
+
"log": "tensorboard",
|
14 |
+
"disable_gradient_checkpointing": false,
|
15 |
+
"logging_steps": -1,
|
16 |
+
"eval_strategy": "steps",
|
17 |
+
"save_total_limit": 1,
|
18 |
+
"auto_find_batch_size": false,
|
19 |
+
"mixed_precision": "fp16",
|
20 |
+
"lr": 3.00003,
|
21 |
+
"epochs": 3,
|
22 |
+
"batch_size": 2,
|
23 |
+
"warmup_ratio": 0.1,
|
24 |
+
"gradient_accumulation": 4,
|
25 |
+
"optimizer": "adamw_torch",
|
26 |
+
"scheduler": "linear",
|
27 |
+
"weight_decay": 0.0,
|
28 |
+
"max_grad_norm": 1.0,
|
29 |
+
"seed": 42,
|
30 |
+
"chat_template": "none",
|
31 |
+
"quantization": null,
|
32 |
+
"target_modules": "all-linear",
|
33 |
+
"merge_adapter": true,
|
34 |
+
"peft": true,
|
35 |
+
"lora_r": 16,
|
36 |
+
"lora_alpha": 32,
|
37 |
+
"lora_dropout": 0.05,
|
38 |
+
"model_ref": null,
|
39 |
+
"dpo_beta": 0.1,
|
40 |
+
"max_prompt_length": 128,
|
41 |
+
"max_completion_length": null,
|
42 |
+
"prompt_text_column": "autotrain_prompt",
|
43 |
+
"text_column": "autotrain_text",
|
44 |
+
"rejected_text_column": "autotrain_rejected_text",
|
45 |
+
"push_to_hub": true,
|
46 |
+
"username": "Wonder-Griffin",
|
47 |
+
"token": "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",
|
48 |
+
"unsloth": false
|
49 |
+
}
|
training_params.json
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model": "Wonder-Griffin/TraXLMistral",
|
3 |
+
"project_name": "trainingTraXLLMistral-2",
|
4 |
+
"data_path": "trainingTraXLLMistral-2/autotrain-data",
|
5 |
+
"train_split": "train",
|
6 |
+
"valid_split": null,
|
7 |
+
"add_eos_token": true,
|
8 |
+
"block_size": 512,
|
9 |
+
"model_max_length": 512,
|
10 |
+
"padding": "right",
|
11 |
+
"trainer": "sft",
|
12 |
+
"use_flash_attention_2": false,
|
13 |
+
"log": "tensorboard",
|
14 |
+
"disable_gradient_checkpointing": false,
|
15 |
+
"logging_steps": -1,
|
16 |
+
"eval_strategy": "epoch",
|
17 |
+
"save_total_limit": 1,
|
18 |
+
"auto_find_batch_size": false,
|
19 |
+
"mixed_precision": "fp16",
|
20 |
+
"lr": 3.00003,
|
21 |
+
"epochs": 3,
|
22 |
+
"batch_size": 2,
|
23 |
+
"warmup_ratio": 0.1,
|
24 |
+
"gradient_accumulation": 4,
|
25 |
+
"optimizer": "adamw_torch",
|
26 |
+
"scheduler": "linear",
|
27 |
+
"weight_decay": 0.0,
|
28 |
+
"max_grad_norm": 1.0,
|
29 |
+
"seed": 42,
|
30 |
+
"chat_template": "none",
|
31 |
+
"quantization": "int4",
|
32 |
+
"target_modules": "all-linear",
|
33 |
+
"merge_adapter": false,
|
34 |
+
"peft": true,
|
35 |
+
"lora_r": 16,
|
36 |
+
"lora_alpha": 32,
|
37 |
+
"lora_dropout": 0.05,
|
38 |
+
"model_ref": null,
|
39 |
+
"dpo_beta": 0.1,
|
40 |
+
"max_prompt_length": 128,
|
41 |
+
"max_completion_length": null,
|
42 |
+
"prompt_text_column": "autotrain_prompt",
|
43 |
+
"text_column": "autotrain_text",
|
44 |
+
"rejected_text_column": "autotrain_rejected_text",
|
45 |
+
"push_to_hub": true,
|
46 |
+
"username": "Wonder-Griffin",
|
47 |
+
"token": "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",
|
48 |
+
"unsloth": false
|
49 |
+
}
|
validation/data-00000-of-00006.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0fcff7033322edc5457a872e7102d5b73008886f82298fd3aa8500e31f0e1898
|
3 |
+
size 612540528
|
validation/data-00001-of-00006.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:80d97f6a51a5deffb0ddf7c0df4b6305c829b90728e941d263fab64ad33c82db
|
3 |
+
size 582202992
|
validation/data-00002-of-00006.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:962772a8f12b6f501e35d580958aa39e405034839e107981c2ff0c5af4852a6c
|
3 |
+
size 469873904
|
validation/data-00003-of-00006.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:22969288efa339ddb93f3b0c96cfa6cd495c71696c4468e4c9adfd13bf7228dd
|
3 |
+
size 141271728
|
validation/data-00004-of-00006.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:27a151ebc8fb3be6e3969d48d651bfeb648146975ef5be3814f64efe51e6691f
|
3 |
+
size 491365808
|
validation/data-00005-of-00006.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e416d92cff19f2e316a49d2bf75471f787dfdcf08d35a1bec581d0dbbdebded3
|
3 |
+
size 220452656
|
validation/dataset_info.json
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"citation": "",
|
3 |
+
"description": "",
|
4 |
+
"features": {
|
5 |
+
"autotrain_text": {
|
6 |
+
"dtype": "string",
|
7 |
+
"_type": "Value"
|
8 |
+
}
|
9 |
+
},
|
10 |
+
"homepage": "",
|
11 |
+
"license": ""
|
12 |
+
}
|
validation/state.json
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_data_files": [
|
3 |
+
{
|
4 |
+
"filename": "data-00000-of-00006.arrow"
|
5 |
+
},
|
6 |
+
{
|
7 |
+
"filename": "data-00001-of-00006.arrow"
|
8 |
+
},
|
9 |
+
{
|
10 |
+
"filename": "data-00002-of-00006.arrow"
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"filename": "data-00003-of-00006.arrow"
|
14 |
+
},
|
15 |
+
{
|
16 |
+
"filename": "data-00004-of-00006.arrow"
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"filename": "data-00005-of-00006.arrow"
|
20 |
+
}
|
21 |
+
],
|
22 |
+
"_fingerprint": "92455bc5ac963ada",
|
23 |
+
"_format_columns": null,
|
24 |
+
"_format_kwargs": {},
|
25 |
+
"_format_type": null,
|
26 |
+
"_output_all_columns": false,
|
27 |
+
"_split": null
|
28 |
+
}
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|