TimoImhof commited on
Commit
f4a3fdc
1 Parent(s): 61543cd

Push local model to hub

Browse files
Files changed (47) hide show
  1. checkpoint-10969/config.json +27 -0
  2. checkpoint-10969/merges.txt +0 -0
  3. checkpoint-10969/optimizer.pt +3 -0
  4. checkpoint-10969/pytorch_model.bin +3 -0
  5. checkpoint-10969/rng_state.pth +3 -0
  6. checkpoint-10969/scaler.pt +3 -0
  7. checkpoint-10969/scheduler.pt +3 -0
  8. checkpoint-10969/special_tokens_map.json +15 -0
  9. checkpoint-10969/tokenizer.json +0 -0
  10. checkpoint-10969/tokenizer_config.json +16 -0
  11. checkpoint-10969/trainer_state.json +142 -0
  12. checkpoint-10969/training_args.bin +3 -0
  13. checkpoint-10969/vocab.json +0 -0
  14. checkpoint-21938/config.json +27 -0
  15. checkpoint-21938/merges.txt +0 -0
  16. checkpoint-21938/optimizer.pt +3 -0
  17. checkpoint-21938/pytorch_model.bin +3 -0
  18. checkpoint-21938/rng_state.pth +3 -0
  19. checkpoint-21938/scaler.pt +3 -0
  20. checkpoint-21938/scheduler.pt +3 -0
  21. checkpoint-21938/special_tokens_map.json +15 -0
  22. checkpoint-21938/tokenizer.json +0 -0
  23. checkpoint-21938/tokenizer_config.json +16 -0
  24. checkpoint-21938/trainer_state.json +274 -0
  25. checkpoint-21938/training_args.bin +3 -0
  26. checkpoint-21938/vocab.json +0 -0
  27. checkpoint-32907/config.json +27 -0
  28. checkpoint-32907/merges.txt +0 -0
  29. checkpoint-32907/optimizer.pt +3 -0
  30. checkpoint-32907/pytorch_model.bin +3 -0
  31. checkpoint-32907/rng_state.pth +3 -0
  32. checkpoint-32907/scaler.pt +3 -0
  33. checkpoint-32907/scheduler.pt +3 -0
  34. checkpoint-32907/special_tokens_map.json +15 -0
  35. checkpoint-32907/tokenizer.json +0 -0
  36. checkpoint-32907/tokenizer_config.json +16 -0
  37. checkpoint-32907/trainer_state.json +406 -0
  38. checkpoint-32907/training_args.bin +3 -0
  39. checkpoint-32907/vocab.json +0 -0
  40. merges.txt +0 -0
  41. runs/Feb16_14-41-22_thanos/1676554888.3852084/events.out.tfevents.1676554888.thanos.148842.1 +3 -0
  42. runs/Feb16_14-41-22_thanos/events.out.tfevents.1676554888.thanos.148842.0 +3 -0
  43. special_tokens_map.json +15 -0
  44. tokenizer.json +0 -0
  45. tokenizer_config.json +16 -0
  46. training_args.bin +3 -0
  47. vocab.json +0 -0
checkpoint-10969/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "roberta-base",
3
+ "architectures": [
4
+ "RobertaForQuestionAnswering"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 514,
17
+ "model_type": "roberta",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "pad_token_id": 1,
21
+ "position_embedding_type": "absolute",
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.25.1",
24
+ "type_vocab_size": 1,
25
+ "use_cache": true,
26
+ "vocab_size": 50265
27
+ }
checkpoint-10969/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-10969/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ce4f9cb9788165f2334fe97cfa1be2ab452fddd13de52810f3f6476bc9d4968
3
+ size 992569349
checkpoint-10969/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3e698cf150c2efa175a8bf4ed882eec9f98733cb96337a0ac1445a220e384fd
3
+ size 496299053
checkpoint-10969/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d77dc84a7f1e401d5b399cb4903628adda9472a3167112c5512530ff0515728e
3
+ size 14575
checkpoint-10969/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42b910046851d4303dde201ba78c42a1ec2c08096ad3ffd9c91c88de668b1094
3
+ size 557
checkpoint-10969/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f774c7a7dea02e608be7b75834d14f3c78cb0d1df29c4265f7fa271ecee28ac2
3
+ size 627
checkpoint-10969/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
15
+ }
checkpoint-10969/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-10969/tokenizer_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": "<s>",
4
+ "cls_token": "<s>",
5
+ "eos_token": "</s>",
6
+ "errors": "replace",
7
+ "mask_token": "<mask>",
8
+ "model_max_length": 512,
9
+ "name_or_path": "roberta-base",
10
+ "pad_token": "<pad>",
11
+ "sep_token": "</s>",
12
+ "special_tokens_map_file": null,
13
+ "tokenizer_class": "RobertaTokenizer",
14
+ "trim_offsets": true,
15
+ "unk_token": "<unk>"
16
+ }
checkpoint-10969/trainer_state.json ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "global_step": 10969,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.05,
12
+ "learning_rate": 1.9699152156076217e-05,
13
+ "loss": 2.2617,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 0.09,
18
+ "learning_rate": 1.939526544504209e-05,
19
+ "loss": 1.242,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 0.14,
24
+ "learning_rate": 1.9091378734007962e-05,
25
+ "loss": 1.1706,
26
+ "step": 1500
27
+ },
28
+ {
29
+ "epoch": 0.18,
30
+ "learning_rate": 1.8787492022973838e-05,
31
+ "loss": 1.123,
32
+ "step": 2000
33
+ },
34
+ {
35
+ "epoch": 0.23,
36
+ "learning_rate": 1.8484820858783847e-05,
37
+ "loss": 1.1132,
38
+ "step": 2500
39
+ },
40
+ {
41
+ "epoch": 0.27,
42
+ "learning_rate": 1.818093414774972e-05,
43
+ "loss": 1.0418,
44
+ "step": 3000
45
+ },
46
+ {
47
+ "epoch": 0.32,
48
+ "learning_rate": 1.7877655210137663e-05,
49
+ "loss": 1.0142,
50
+ "step": 3500
51
+ },
52
+ {
53
+ "epoch": 0.36,
54
+ "learning_rate": 1.7573768499103536e-05,
55
+ "loss": 1.0665,
56
+ "step": 4000
57
+ },
58
+ {
59
+ "epoch": 0.41,
60
+ "learning_rate": 1.726988178806941e-05,
61
+ "loss": 0.9765,
62
+ "step": 4500
63
+ },
64
+ {
65
+ "epoch": 0.46,
66
+ "learning_rate": 1.696599507703528e-05,
67
+ "loss": 0.9874,
68
+ "step": 5000
69
+ },
70
+ {
71
+ "epoch": 0.5,
72
+ "learning_rate": 1.6662108366001154e-05,
73
+ "loss": 0.9705,
74
+ "step": 5500
75
+ },
76
+ {
77
+ "epoch": 0.55,
78
+ "learning_rate": 1.635822165496703e-05,
79
+ "loss": 0.9824,
80
+ "step": 6000
81
+ },
82
+ {
83
+ "epoch": 0.59,
84
+ "learning_rate": 1.6054334943932903e-05,
85
+ "loss": 0.939,
86
+ "step": 6500
87
+ },
88
+ {
89
+ "epoch": 0.64,
90
+ "learning_rate": 1.5750448232898775e-05,
91
+ "loss": 0.9501,
92
+ "step": 7000
93
+ },
94
+ {
95
+ "epoch": 0.68,
96
+ "learning_rate": 1.5446561521864648e-05,
97
+ "loss": 0.9427,
98
+ "step": 7500
99
+ },
100
+ {
101
+ "epoch": 0.73,
102
+ "learning_rate": 1.5142674810830524e-05,
103
+ "loss": 0.9215,
104
+ "step": 8000
105
+ },
106
+ {
107
+ "epoch": 0.77,
108
+ "learning_rate": 1.4838788099796397e-05,
109
+ "loss": 0.9106,
110
+ "step": 8500
111
+ },
112
+ {
113
+ "epoch": 0.82,
114
+ "learning_rate": 1.4534901388762271e-05,
115
+ "loss": 0.9276,
116
+ "step": 9000
117
+ },
118
+ {
119
+ "epoch": 0.87,
120
+ "learning_rate": 1.4231014677728144e-05,
121
+ "loss": 0.9249,
122
+ "step": 9500
123
+ },
124
+ {
125
+ "epoch": 0.91,
126
+ "learning_rate": 1.3927127966694018e-05,
127
+ "loss": 0.9114,
128
+ "step": 10000
129
+ },
130
+ {
131
+ "epoch": 0.96,
132
+ "learning_rate": 1.3623849029081959e-05,
133
+ "loss": 0.888,
134
+ "step": 10500
135
+ }
136
+ ],
137
+ "max_steps": 32907,
138
+ "num_train_epochs": 3,
139
+ "total_flos": 2.292748391980032e+16,
140
+ "trial_name": null,
141
+ "trial_params": null
142
+ }
checkpoint-10969/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d80c75984e8a107d02a5ef6eed2c6426f6ae1bd4b878b4dfdbd06300d655871a
3
+ size 3643
checkpoint-10969/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-21938/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "roberta-base",
3
+ "architectures": [
4
+ "RobertaForQuestionAnswering"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 514,
17
+ "model_type": "roberta",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "pad_token_id": 1,
21
+ "position_embedding_type": "absolute",
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.25.1",
24
+ "type_vocab_size": 1,
25
+ "use_cache": true,
26
+ "vocab_size": 50265
27
+ }
checkpoint-21938/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-21938/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e62bfb3062c9b98c64eecb85b24ec92eb01bcbba1215bb1b17c6926d143b0a39
3
+ size 992569349
checkpoint-21938/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbd1aea24ad68b8933c6223a1a22fbeb25994a3f8372cc4c7c98297c9ee5150e
3
+ size 496299053
checkpoint-21938/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6f2c0fed0f1b2c596bc2074911df902c8326c1194c70209de13e4088a37140b
3
+ size 14575
checkpoint-21938/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0dcad6b5531fe06de4e161813304b36fa7c0078ae25b1b2a66cf44f5471ab87
3
+ size 557
checkpoint-21938/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afeb35c0d8f854a6cea6bad0d1b8413a4d741ca6ad22447a47d479c3eaf65657
3
+ size 627
checkpoint-21938/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
15
+ }
checkpoint-21938/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-21938/tokenizer_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": "<s>",
4
+ "cls_token": "<s>",
5
+ "eos_token": "</s>",
6
+ "errors": "replace",
7
+ "mask_token": "<mask>",
8
+ "model_max_length": 512,
9
+ "name_or_path": "roberta-base",
10
+ "pad_token": "<pad>",
11
+ "sep_token": "</s>",
12
+ "special_tokens_map_file": null,
13
+ "tokenizer_class": "RobertaTokenizer",
14
+ "trim_offsets": true,
15
+ "unk_token": "<unk>"
16
+ }
checkpoint-21938/trainer_state.json ADDED
@@ -0,0 +1,274 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "global_step": 21938,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.05,
12
+ "learning_rate": 1.9699152156076217e-05,
13
+ "loss": 2.2617,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 0.09,
18
+ "learning_rate": 1.939526544504209e-05,
19
+ "loss": 1.242,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 0.14,
24
+ "learning_rate": 1.9091378734007962e-05,
25
+ "loss": 1.1706,
26
+ "step": 1500
27
+ },
28
+ {
29
+ "epoch": 0.18,
30
+ "learning_rate": 1.8787492022973838e-05,
31
+ "loss": 1.123,
32
+ "step": 2000
33
+ },
34
+ {
35
+ "epoch": 0.23,
36
+ "learning_rate": 1.8484820858783847e-05,
37
+ "loss": 1.1132,
38
+ "step": 2500
39
+ },
40
+ {
41
+ "epoch": 0.27,
42
+ "learning_rate": 1.818093414774972e-05,
43
+ "loss": 1.0418,
44
+ "step": 3000
45
+ },
46
+ {
47
+ "epoch": 0.32,
48
+ "learning_rate": 1.7877655210137663e-05,
49
+ "loss": 1.0142,
50
+ "step": 3500
51
+ },
52
+ {
53
+ "epoch": 0.36,
54
+ "learning_rate": 1.7573768499103536e-05,
55
+ "loss": 1.0665,
56
+ "step": 4000
57
+ },
58
+ {
59
+ "epoch": 0.41,
60
+ "learning_rate": 1.726988178806941e-05,
61
+ "loss": 0.9765,
62
+ "step": 4500
63
+ },
64
+ {
65
+ "epoch": 0.46,
66
+ "learning_rate": 1.696599507703528e-05,
67
+ "loss": 0.9874,
68
+ "step": 5000
69
+ },
70
+ {
71
+ "epoch": 0.5,
72
+ "learning_rate": 1.6662108366001154e-05,
73
+ "loss": 0.9705,
74
+ "step": 5500
75
+ },
76
+ {
77
+ "epoch": 0.55,
78
+ "learning_rate": 1.635822165496703e-05,
79
+ "loss": 0.9824,
80
+ "step": 6000
81
+ },
82
+ {
83
+ "epoch": 0.59,
84
+ "learning_rate": 1.6054334943932903e-05,
85
+ "loss": 0.939,
86
+ "step": 6500
87
+ },
88
+ {
89
+ "epoch": 0.64,
90
+ "learning_rate": 1.5750448232898775e-05,
91
+ "loss": 0.9501,
92
+ "step": 7000
93
+ },
94
+ {
95
+ "epoch": 0.68,
96
+ "learning_rate": 1.5446561521864648e-05,
97
+ "loss": 0.9427,
98
+ "step": 7500
99
+ },
100
+ {
101
+ "epoch": 0.73,
102
+ "learning_rate": 1.5142674810830524e-05,
103
+ "loss": 0.9215,
104
+ "step": 8000
105
+ },
106
+ {
107
+ "epoch": 0.77,
108
+ "learning_rate": 1.4838788099796397e-05,
109
+ "loss": 0.9106,
110
+ "step": 8500
111
+ },
112
+ {
113
+ "epoch": 0.82,
114
+ "learning_rate": 1.4534901388762271e-05,
115
+ "loss": 0.9276,
116
+ "step": 9000
117
+ },
118
+ {
119
+ "epoch": 0.87,
120
+ "learning_rate": 1.4231014677728144e-05,
121
+ "loss": 0.9249,
122
+ "step": 9500
123
+ },
124
+ {
125
+ "epoch": 0.91,
126
+ "learning_rate": 1.3927127966694018e-05,
127
+ "loss": 0.9114,
128
+ "step": 10000
129
+ },
130
+ {
131
+ "epoch": 0.96,
132
+ "learning_rate": 1.3623849029081959e-05,
133
+ "loss": 0.888,
134
+ "step": 10500
135
+ },
136
+ {
137
+ "epoch": 1.0,
138
+ "learning_rate": 1.3319962318047831e-05,
139
+ "loss": 0.8897,
140
+ "step": 11000
141
+ },
142
+ {
143
+ "epoch": 1.05,
144
+ "learning_rate": 1.3016075607013707e-05,
145
+ "loss": 0.765,
146
+ "step": 11500
147
+ },
148
+ {
149
+ "epoch": 1.09,
150
+ "learning_rate": 1.271218889597958e-05,
151
+ "loss": 0.7409,
152
+ "step": 12000
153
+ },
154
+ {
155
+ "epoch": 1.14,
156
+ "learning_rate": 1.2408302184945453e-05,
157
+ "loss": 0.7642,
158
+ "step": 12500
159
+ },
160
+ {
161
+ "epoch": 1.19,
162
+ "learning_rate": 1.2105023247333397e-05,
163
+ "loss": 0.7327,
164
+ "step": 13000
165
+ },
166
+ {
167
+ "epoch": 1.23,
168
+ "learning_rate": 1.180113653629927e-05,
169
+ "loss": 0.7358,
170
+ "step": 13500
171
+ },
172
+ {
173
+ "epoch": 1.28,
174
+ "learning_rate": 1.1497249825265142e-05,
175
+ "loss": 0.7657,
176
+ "step": 14000
177
+ },
178
+ {
179
+ "epoch": 1.32,
180
+ "learning_rate": 1.1193363114231015e-05,
181
+ "loss": 0.7393,
182
+ "step": 14500
183
+ },
184
+ {
185
+ "epoch": 1.37,
186
+ "learning_rate": 1.0889476403196887e-05,
187
+ "loss": 0.7338,
188
+ "step": 15000
189
+ },
190
+ {
191
+ "epoch": 1.41,
192
+ "learning_rate": 1.0586197465584831e-05,
193
+ "loss": 0.7693,
194
+ "step": 15500
195
+ },
196
+ {
197
+ "epoch": 1.46,
198
+ "learning_rate": 1.0282310754550704e-05,
199
+ "loss": 0.7357,
200
+ "step": 16000
201
+ },
202
+ {
203
+ "epoch": 1.5,
204
+ "learning_rate": 9.978424043516578e-06,
205
+ "loss": 0.7303,
206
+ "step": 16500
207
+ },
208
+ {
209
+ "epoch": 1.55,
210
+ "learning_rate": 9.674537332482451e-06,
211
+ "loss": 0.7295,
212
+ "step": 17000
213
+ },
214
+ {
215
+ "epoch": 1.6,
216
+ "learning_rate": 9.371258394870393e-06,
217
+ "loss": 0.7393,
218
+ "step": 17500
219
+ },
220
+ {
221
+ "epoch": 1.64,
222
+ "learning_rate": 9.067371683836268e-06,
223
+ "loss": 0.7345,
224
+ "step": 18000
225
+ },
226
+ {
227
+ "epoch": 1.69,
228
+ "learning_rate": 8.76348497280214e-06,
229
+ "loss": 0.7507,
230
+ "step": 18500
231
+ },
232
+ {
233
+ "epoch": 1.73,
234
+ "learning_rate": 8.459598261768015e-06,
235
+ "loss": 0.7195,
236
+ "step": 19000
237
+ },
238
+ {
239
+ "epoch": 1.78,
240
+ "learning_rate": 8.155711550733887e-06,
241
+ "loss": 0.7368,
242
+ "step": 19500
243
+ },
244
+ {
245
+ "epoch": 1.82,
246
+ "learning_rate": 7.852432613121828e-06,
247
+ "loss": 0.7317,
248
+ "step": 20000
249
+ },
250
+ {
251
+ "epoch": 1.87,
252
+ "learning_rate": 7.548545902087703e-06,
253
+ "loss": 0.7191,
254
+ "step": 20500
255
+ },
256
+ {
257
+ "epoch": 1.91,
258
+ "learning_rate": 7.244659191053576e-06,
259
+ "loss": 0.7102,
260
+ "step": 21000
261
+ },
262
+ {
263
+ "epoch": 1.96,
264
+ "learning_rate": 6.94077248001945e-06,
265
+ "loss": 0.7145,
266
+ "step": 21500
267
+ }
268
+ ],
269
+ "max_steps": 32907,
270
+ "num_train_epochs": 3,
271
+ "total_flos": 4.585496783960064e+16,
272
+ "trial_name": null,
273
+ "trial_params": null
274
+ }
checkpoint-21938/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d80c75984e8a107d02a5ef6eed2c6426f6ae1bd4b878b4dfdbd06300d655871a
3
+ size 3643
checkpoint-21938/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-32907/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "roberta-base",
3
+ "architectures": [
4
+ "RobertaForQuestionAnswering"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 514,
17
+ "model_type": "roberta",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "pad_token_id": 1,
21
+ "position_embedding_type": "absolute",
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.25.1",
24
+ "type_vocab_size": 1,
25
+ "use_cache": true,
26
+ "vocab_size": 50265
27
+ }
checkpoint-32907/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-32907/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:580a90b25b8bb0b375ae95b0412d940a00898b93ec12e7b3811f47937c47f0d7
3
+ size 992569349
checkpoint-32907/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40309e62b19ac117150129100e3a969aef69603b619e100fe522fba0e35811d1
3
+ size 496299053
checkpoint-32907/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b6db3e133ca595d3307be164fdcea0f8c1f8df7c82b076c3f97c8ce0f21c8a3
3
+ size 14575
checkpoint-32907/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a2a3b3f5afc8d03be33400c918623df364335b76dfe18f9438b39def69e6ce5
3
+ size 557
checkpoint-32907/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd97e8f3ba014288a8102999321bd1599908154cd43ec34579545176f9f4d754
3
+ size 627
checkpoint-32907/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
15
+ }
checkpoint-32907/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-32907/tokenizer_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": "<s>",
4
+ "cls_token": "<s>",
5
+ "eos_token": "</s>",
6
+ "errors": "replace",
7
+ "mask_token": "<mask>",
8
+ "model_max_length": 512,
9
+ "name_or_path": "roberta-base",
10
+ "pad_token": "<pad>",
11
+ "sep_token": "</s>",
12
+ "special_tokens_map_file": null,
13
+ "tokenizer_class": "RobertaTokenizer",
14
+ "trim_offsets": true,
15
+ "unk_token": "<unk>"
16
+ }
checkpoint-32907/trainer_state.json ADDED
@@ -0,0 +1,406 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
+ "global_step": 32907,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.05,
12
+ "learning_rate": 1.9699152156076217e-05,
13
+ "loss": 2.2617,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 0.09,
18
+ "learning_rate": 1.939526544504209e-05,
19
+ "loss": 1.242,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 0.14,
24
+ "learning_rate": 1.9091378734007962e-05,
25
+ "loss": 1.1706,
26
+ "step": 1500
27
+ },
28
+ {
29
+ "epoch": 0.18,
30
+ "learning_rate": 1.8787492022973838e-05,
31
+ "loss": 1.123,
32
+ "step": 2000
33
+ },
34
+ {
35
+ "epoch": 0.23,
36
+ "learning_rate": 1.8484820858783847e-05,
37
+ "loss": 1.1132,
38
+ "step": 2500
39
+ },
40
+ {
41
+ "epoch": 0.27,
42
+ "learning_rate": 1.818093414774972e-05,
43
+ "loss": 1.0418,
44
+ "step": 3000
45
+ },
46
+ {
47
+ "epoch": 0.32,
48
+ "learning_rate": 1.7877655210137663e-05,
49
+ "loss": 1.0142,
50
+ "step": 3500
51
+ },
52
+ {
53
+ "epoch": 0.36,
54
+ "learning_rate": 1.7573768499103536e-05,
55
+ "loss": 1.0665,
56
+ "step": 4000
57
+ },
58
+ {
59
+ "epoch": 0.41,
60
+ "learning_rate": 1.726988178806941e-05,
61
+ "loss": 0.9765,
62
+ "step": 4500
63
+ },
64
+ {
65
+ "epoch": 0.46,
66
+ "learning_rate": 1.696599507703528e-05,
67
+ "loss": 0.9874,
68
+ "step": 5000
69
+ },
70
+ {
71
+ "epoch": 0.5,
72
+ "learning_rate": 1.6662108366001154e-05,
73
+ "loss": 0.9705,
74
+ "step": 5500
75
+ },
76
+ {
77
+ "epoch": 0.55,
78
+ "learning_rate": 1.635822165496703e-05,
79
+ "loss": 0.9824,
80
+ "step": 6000
81
+ },
82
+ {
83
+ "epoch": 0.59,
84
+ "learning_rate": 1.6054334943932903e-05,
85
+ "loss": 0.939,
86
+ "step": 6500
87
+ },
88
+ {
89
+ "epoch": 0.64,
90
+ "learning_rate": 1.5750448232898775e-05,
91
+ "loss": 0.9501,
92
+ "step": 7000
93
+ },
94
+ {
95
+ "epoch": 0.68,
96
+ "learning_rate": 1.5446561521864648e-05,
97
+ "loss": 0.9427,
98
+ "step": 7500
99
+ },
100
+ {
101
+ "epoch": 0.73,
102
+ "learning_rate": 1.5142674810830524e-05,
103
+ "loss": 0.9215,
104
+ "step": 8000
105
+ },
106
+ {
107
+ "epoch": 0.77,
108
+ "learning_rate": 1.4838788099796397e-05,
109
+ "loss": 0.9106,
110
+ "step": 8500
111
+ },
112
+ {
113
+ "epoch": 0.82,
114
+ "learning_rate": 1.4534901388762271e-05,
115
+ "loss": 0.9276,
116
+ "step": 9000
117
+ },
118
+ {
119
+ "epoch": 0.87,
120
+ "learning_rate": 1.4231014677728144e-05,
121
+ "loss": 0.9249,
122
+ "step": 9500
123
+ },
124
+ {
125
+ "epoch": 0.91,
126
+ "learning_rate": 1.3927127966694018e-05,
127
+ "loss": 0.9114,
128
+ "step": 10000
129
+ },
130
+ {
131
+ "epoch": 0.96,
132
+ "learning_rate": 1.3623849029081959e-05,
133
+ "loss": 0.888,
134
+ "step": 10500
135
+ },
136
+ {
137
+ "epoch": 1.0,
138
+ "learning_rate": 1.3319962318047831e-05,
139
+ "loss": 0.8897,
140
+ "step": 11000
141
+ },
142
+ {
143
+ "epoch": 1.05,
144
+ "learning_rate": 1.3016075607013707e-05,
145
+ "loss": 0.765,
146
+ "step": 11500
147
+ },
148
+ {
149
+ "epoch": 1.09,
150
+ "learning_rate": 1.271218889597958e-05,
151
+ "loss": 0.7409,
152
+ "step": 12000
153
+ },
154
+ {
155
+ "epoch": 1.14,
156
+ "learning_rate": 1.2408302184945453e-05,
157
+ "loss": 0.7642,
158
+ "step": 12500
159
+ },
160
+ {
161
+ "epoch": 1.19,
162
+ "learning_rate": 1.2105023247333397e-05,
163
+ "loss": 0.7327,
164
+ "step": 13000
165
+ },
166
+ {
167
+ "epoch": 1.23,
168
+ "learning_rate": 1.180113653629927e-05,
169
+ "loss": 0.7358,
170
+ "step": 13500
171
+ },
172
+ {
173
+ "epoch": 1.28,
174
+ "learning_rate": 1.1497249825265142e-05,
175
+ "loss": 0.7657,
176
+ "step": 14000
177
+ },
178
+ {
179
+ "epoch": 1.32,
180
+ "learning_rate": 1.1193363114231015e-05,
181
+ "loss": 0.7393,
182
+ "step": 14500
183
+ },
184
+ {
185
+ "epoch": 1.37,
186
+ "learning_rate": 1.0889476403196887e-05,
187
+ "loss": 0.7338,
188
+ "step": 15000
189
+ },
190
+ {
191
+ "epoch": 1.41,
192
+ "learning_rate": 1.0586197465584831e-05,
193
+ "loss": 0.7693,
194
+ "step": 15500
195
+ },
196
+ {
197
+ "epoch": 1.46,
198
+ "learning_rate": 1.0282310754550704e-05,
199
+ "loss": 0.7357,
200
+ "step": 16000
201
+ },
202
+ {
203
+ "epoch": 1.5,
204
+ "learning_rate": 9.978424043516578e-06,
205
+ "loss": 0.7303,
206
+ "step": 16500
207
+ },
208
+ {
209
+ "epoch": 1.55,
210
+ "learning_rate": 9.674537332482451e-06,
211
+ "loss": 0.7295,
212
+ "step": 17000
213
+ },
214
+ {
215
+ "epoch": 1.6,
216
+ "learning_rate": 9.371258394870393e-06,
217
+ "loss": 0.7393,
218
+ "step": 17500
219
+ },
220
+ {
221
+ "epoch": 1.64,
222
+ "learning_rate": 9.067371683836268e-06,
223
+ "loss": 0.7345,
224
+ "step": 18000
225
+ },
226
+ {
227
+ "epoch": 1.69,
228
+ "learning_rate": 8.76348497280214e-06,
229
+ "loss": 0.7507,
230
+ "step": 18500
231
+ },
232
+ {
233
+ "epoch": 1.73,
234
+ "learning_rate": 8.459598261768015e-06,
235
+ "loss": 0.7195,
236
+ "step": 19000
237
+ },
238
+ {
239
+ "epoch": 1.78,
240
+ "learning_rate": 8.155711550733887e-06,
241
+ "loss": 0.7368,
242
+ "step": 19500
243
+ },
244
+ {
245
+ "epoch": 1.82,
246
+ "learning_rate": 7.852432613121828e-06,
247
+ "loss": 0.7317,
248
+ "step": 20000
249
+ },
250
+ {
251
+ "epoch": 1.87,
252
+ "learning_rate": 7.548545902087703e-06,
253
+ "loss": 0.7191,
254
+ "step": 20500
255
+ },
256
+ {
257
+ "epoch": 1.91,
258
+ "learning_rate": 7.244659191053576e-06,
259
+ "loss": 0.7102,
260
+ "step": 21000
261
+ },
262
+ {
263
+ "epoch": 1.96,
264
+ "learning_rate": 6.94077248001945e-06,
265
+ "loss": 0.7145,
266
+ "step": 21500
267
+ },
268
+ {
269
+ "epoch": 2.01,
270
+ "learning_rate": 6.636885768985323e-06,
271
+ "loss": 0.7201,
272
+ "step": 22000
273
+ },
274
+ {
275
+ "epoch": 2.05,
276
+ "learning_rate": 6.3329990579511964e-06,
277
+ "loss": 0.5908,
278
+ "step": 22500
279
+ },
280
+ {
281
+ "epoch": 2.1,
282
+ "learning_rate": 6.029720120339138e-06,
283
+ "loss": 0.5951,
284
+ "step": 23000
285
+ },
286
+ {
287
+ "epoch": 2.14,
288
+ "learning_rate": 5.725833409305011e-06,
289
+ "loss": 0.5854,
290
+ "step": 23500
291
+ },
292
+ {
293
+ "epoch": 2.19,
294
+ "learning_rate": 5.421946698270885e-06,
295
+ "loss": 0.5901,
296
+ "step": 24000
297
+ },
298
+ {
299
+ "epoch": 2.23,
300
+ "learning_rate": 5.118059987236758e-06,
301
+ "loss": 0.599,
302
+ "step": 24500
303
+ },
304
+ {
305
+ "epoch": 2.28,
306
+ "learning_rate": 4.814173276202632e-06,
307
+ "loss": 0.5736,
308
+ "step": 25000
309
+ },
310
+ {
311
+ "epoch": 2.32,
312
+ "learning_rate": 4.510894338590574e-06,
313
+ "loss": 0.6043,
314
+ "step": 25500
315
+ },
316
+ {
317
+ "epoch": 2.37,
318
+ "learning_rate": 4.207007627556448e-06,
319
+ "loss": 0.5786,
320
+ "step": 26000
321
+ },
322
+ {
323
+ "epoch": 2.42,
324
+ "learning_rate": 3.903728689944389e-06,
325
+ "loss": 0.5959,
326
+ "step": 26500
327
+ },
328
+ {
329
+ "epoch": 2.46,
330
+ "learning_rate": 3.6004497523323308e-06,
331
+ "loss": 0.5894,
332
+ "step": 27000
333
+ },
334
+ {
335
+ "epoch": 2.51,
336
+ "learning_rate": 3.2965630412982043e-06,
337
+ "loss": 0.5782,
338
+ "step": 27500
339
+ },
340
+ {
341
+ "epoch": 2.55,
342
+ "learning_rate": 2.992676330264078e-06,
343
+ "loss": 0.5738,
344
+ "step": 28000
345
+ },
346
+ {
347
+ "epoch": 2.6,
348
+ "learning_rate": 2.6887896192299514e-06,
349
+ "loss": 0.5848,
350
+ "step": 28500
351
+ },
352
+ {
353
+ "epoch": 2.64,
354
+ "learning_rate": 2.384902908195825e-06,
355
+ "loss": 0.5807,
356
+ "step": 29000
357
+ },
358
+ {
359
+ "epoch": 2.69,
360
+ "learning_rate": 2.0810161971616984e-06,
361
+ "loss": 0.5709,
362
+ "step": 29500
363
+ },
364
+ {
365
+ "epoch": 2.73,
366
+ "learning_rate": 1.7771294861275718e-06,
367
+ "loss": 0.5729,
368
+ "step": 30000
369
+ },
370
+ {
371
+ "epoch": 2.78,
372
+ "learning_rate": 1.4738505485155135e-06,
373
+ "loss": 0.5747,
374
+ "step": 30500
375
+ },
376
+ {
377
+ "epoch": 2.83,
378
+ "learning_rate": 1.169963837481387e-06,
379
+ "loss": 0.5834,
380
+ "step": 31000
381
+ },
382
+ {
383
+ "epoch": 2.87,
384
+ "learning_rate": 8.660771264472605e-07,
385
+ "loss": 0.5872,
386
+ "step": 31500
387
+ },
388
+ {
389
+ "epoch": 2.92,
390
+ "learning_rate": 5.62190415413134e-07,
391
+ "loss": 0.5577,
392
+ "step": 32000
393
+ },
394
+ {
395
+ "epoch": 2.96,
396
+ "learning_rate": 2.5830370437900756e-07,
397
+ "loss": 0.5637,
398
+ "step": 32500
399
+ }
400
+ ],
401
+ "max_steps": 32907,
402
+ "num_train_epochs": 3,
403
+ "total_flos": 6.878245175940096e+16,
404
+ "trial_name": null,
405
+ "trial_params": null
406
+ }
checkpoint-32907/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d80c75984e8a107d02a5ef6eed2c6426f6ae1bd4b878b4dfdbd06300d655871a
3
+ size 3643
checkpoint-32907/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
runs/Feb16_14-41-22_thanos/1676554888.3852084/events.out.tfevents.1676554888.thanos.148842.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b2c8b3c1349f390b29b81772b823c9649304e43f1f38b79aa39ea572cde425f
3
+ size 5832
runs/Feb16_14-41-22_thanos/events.out.tfevents.1676554888.thanos.148842.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:349167078b7c239aee51d880735972a1a0f090631c74efb563297a688ce1be65
3
+ size 14709
special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
15
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": "<s>",
4
+ "cls_token": "<s>",
5
+ "eos_token": "</s>",
6
+ "errors": "replace",
7
+ "mask_token": "<mask>",
8
+ "model_max_length": 512,
9
+ "name_or_path": "roberta-base",
10
+ "pad_token": "<pad>",
11
+ "sep_token": "</s>",
12
+ "special_tokens_map_file": null,
13
+ "tokenizer_class": "RobertaTokenizer",
14
+ "trim_offsets": true,
15
+ "unk_token": "<unk>"
16
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d80c75984e8a107d02a5ef6eed2c6426f6ae1bd4b878b4dfdbd06300d655871a
3
+ size 3643
vocab.json ADDED
The diff for this file is too large to render. See raw diff