Upload 8 files
Browse files- nanoDist18/config.json +26 -0
- nanoDist18/optimizer.pt +3 -0
- nanoDist18/pytorch_model.bin +3 -0
- nanoDist18/rng_state.pth +3 -0
- nanoDist18/scaler.pt +3 -0
- nanoDist18/scheduler.pt +3 -0
- nanoDist18/trainer_state.json +58 -0
- nanoDist18/training_args.bin +3 -0
nanoDist18/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"RobertaForMaskedLM"
|
4 |
+
],
|
5 |
+
"attention_probs_dropout_prob": 0.1,
|
6 |
+
"bos_token_id": 0,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"eos_token_id": 2,
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 768,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 3072,
|
14 |
+
"layer_norm_eps": 1e-12,
|
15 |
+
"max_position_embeddings": 156,
|
16 |
+
"model_type": "roberta",
|
17 |
+
"num_attention_heads": 12,
|
18 |
+
"num_hidden_layers": 12,
|
19 |
+
"pad_token_id": 1,
|
20 |
+
"position_embedding_type": "absolute",
|
21 |
+
"torch_dtype": "float32",
|
22 |
+
"transformers_version": "4.24.0",
|
23 |
+
"type_vocab_size": 1,
|
24 |
+
"use_cache": true,
|
25 |
+
"vocab_size": 26
|
26 |
+
}
|
nanoDist18/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a767a1603187aee67d00f21574973622e0305ea38ed47df49be4456a18ee749f
|
3 |
+
size 686428357
|
nanoDist18/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ad579a984de9c19aef896f0bd3770834b84023d0412aee795471ab0ceeffd341
|
3 |
+
size 343225593
|
nanoDist18/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8161de00b7e4b8b84bf4e81918ab41457046704246e186132ef5b25525c1e979
|
3 |
+
size 15597
|
nanoDist18/scaler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:987e725c68fbad78a31692df7a404121c4a7d55370f9c0db127b90fa8c7c19b3
|
3 |
+
size 557
|
nanoDist18/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:28807a4920832c08a4fe739fbabad8636da10fd1f39875f013f6d90a3e33e21d
|
3 |
+
size 627
|
nanoDist18/trainer_state.json
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 3.0,
|
5 |
+
"global_step": 139704,
|
6 |
+
"is_hyper_param_search": false,
|
7 |
+
"is_local_process_zero": true,
|
8 |
+
"is_world_process_zero": true,
|
9 |
+
"log_history": [
|
10 |
+
{
|
11 |
+
"epoch": 1.0,
|
12 |
+
"learning_rate": 8.359719978459882e-05,
|
13 |
+
"loss": 0.6809,
|
14 |
+
"step": 46568
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"epoch": 1.0,
|
18 |
+
"eval_loss": 0.3213633894920349,
|
19 |
+
"eval_runtime": 420.6563,
|
20 |
+
"eval_samples_per_second": 2182.138,
|
21 |
+
"eval_steps_per_second": 11.366,
|
22 |
+
"step": 46568
|
23 |
+
},
|
24 |
+
{
|
25 |
+
"epoch": 2.0,
|
26 |
+
"learning_rate": 6.270866989768444e-05,
|
27 |
+
"loss": 0.4706,
|
28 |
+
"step": 93136
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"epoch": 2.0,
|
32 |
+
"eval_loss": 0.2747191786766052,
|
33 |
+
"eval_runtime": 418.1806,
|
34 |
+
"eval_samples_per_second": 2195.056,
|
35 |
+
"eval_steps_per_second": 11.433,
|
36 |
+
"step": 93136
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"epoch": 3.0,
|
40 |
+
"learning_rate": 4.1818793753365646e-05,
|
41 |
+
"loss": 0.4215,
|
42 |
+
"step": 139704
|
43 |
+
},
|
44 |
+
{
|
45 |
+
"epoch": 3.0,
|
46 |
+
"eval_loss": 0.2559063732624054,
|
47 |
+
"eval_runtime": 418.7155,
|
48 |
+
"eval_samples_per_second": 2192.252,
|
49 |
+
"eval_steps_per_second": 11.418,
|
50 |
+
"step": 139704
|
51 |
+
}
|
52 |
+
],
|
53 |
+
"max_steps": 232840,
|
54 |
+
"num_train_epochs": 5,
|
55 |
+
"total_flos": 2.1227226187002455e+18,
|
56 |
+
"trial_name": null,
|
57 |
+
"trial_params": null
|
58 |
+
}
|
nanoDist18/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:49da3e89dac24b684080ef6f785d204e946deaf066e378059d8d4eb8b0a67934
|
3 |
+
size 3387
|