Kira-Floris commited on
Commit
fb00fc9
1 Parent(s): 5603086

Training in progress, epoch 6

Browse files
logs/events.out.tfevents.1719310313.852b1e905a9a.223.5 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d753cd9c975379dd90b5d356886d24298df7ec6c21398a39b744289b5bd80a24
3
- size 7565
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b7c27678c96b55268ae20101a3dcdcabed72a7e35b5314e74a49c2627c96740
3
+ size 8453
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:97d865cd86bbd82a83507568d3aedd623e054710a155c7e8e3cddf0ba2b7c970
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d47eed6b6c10b89046c5579792754bc1f6f31fc160278e47e2d473bc77094d3
3
  size 17549312
run-0/checkpoint-3162/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e66d9522c7e5449c5f8219463267e60b5ec3f1017664ef4e316e07a78f5f39e
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d47eed6b6c10b89046c5579792754bc1f6f31fc160278e47e2d473bc77094d3
3
  size 17549312
run-0/checkpoint-3162/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83703cd4bf2a473fb393d524ea8bded4014b2b81fcc50abc39cd466f11e50108
3
  size 35123898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d12374f16343dd9854da47ea59df55bcfc6c07f511e63ee993f70c9343f14ab5
3
  size 35123898
run-0/checkpoint-3162/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0eb013781aa27303ed3472ad10f082332665efcab61c356458c316242fc903cb
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4be833998b5d5f182cff6fbb7697231be6c352b034f0ce1e3cb32add5329f3a2
3
  size 1064
run-0/checkpoint-3162/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.7947247706422018,
3
- "best_model_checkpoint": "tiny-bert-sst2-distilled/run-0/checkpoint-3162",
4
  "epoch": 6.0,
5
  "eval_steps": 500,
6
  "global_step": 3162,
@@ -10,105 +10,105 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 5.95961856842041,
14
- "learning_rate": 9.55389368279823e-06,
15
- "loss": 1.5369,
16
  "step": 527
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.7339449541284404,
21
- "eval_loss": 1.2773902416229248,
22
- "eval_runtime": 2.5957,
23
- "eval_samples_per_second": 335.939,
24
- "eval_steps_per_second": 2.697,
25
  "step": 527
26
  },
27
  {
28
  "epoch": 2.0,
29
- "grad_norm": 15.252978324890137,
30
- "learning_rate": 8.492349940265094e-06,
31
- "loss": 1.2159,
32
  "step": 1054
33
  },
34
  {
35
  "epoch": 2.0,
36
- "eval_accuracy": 0.7786697247706422,
37
- "eval_loss": 1.022659182548523,
38
- "eval_runtime": 2.5741,
39
- "eval_samples_per_second": 338.753,
40
- "eval_steps_per_second": 2.719,
41
  "step": 1054
42
  },
43
  {
44
  "epoch": 3.0,
45
- "grad_norm": 18.01114845275879,
46
- "learning_rate": 7.430806197731956e-06,
47
- "loss": 1.0132,
48
  "step": 1581
49
  },
50
  {
51
  "epoch": 3.0,
52
- "eval_accuracy": 0.7844036697247706,
53
- "eval_loss": 0.9622328281402588,
54
- "eval_runtime": 2.5865,
55
- "eval_samples_per_second": 337.137,
56
- "eval_steps_per_second": 2.706,
57
  "step": 1581
58
  },
59
  {
60
  "epoch": 4.0,
61
- "grad_norm": 15.912079811096191,
62
- "learning_rate": 6.371276769700781e-06,
63
- "loss": 0.9206,
64
  "step": 2108
65
  },
66
  {
67
  "epoch": 4.0,
68
- "eval_accuracy": 0.7901376146788991,
69
- "eval_loss": 0.9278557896614075,
70
- "eval_runtime": 2.5808,
71
- "eval_samples_per_second": 337.878,
72
- "eval_steps_per_second": 2.712,
73
  "step": 2108
74
  },
75
  {
76
  "epoch": 5.0,
77
- "grad_norm": 10.149016380310059,
78
- "learning_rate": 5.3097330271676446e-06,
79
- "loss": 0.8564,
80
  "step": 2635
81
  },
82
  {
83
  "epoch": 5.0,
84
- "eval_accuracy": 0.7901376146788991,
85
- "eval_loss": 0.9079629182815552,
86
- "eval_runtime": 2.5658,
87
- "eval_samples_per_second": 339.852,
88
- "eval_steps_per_second": 2.728,
89
  "step": 2635
90
  },
91
  {
92
  "epoch": 6.0,
93
- "grad_norm": 16.081409454345703,
94
- "learning_rate": 4.248189284634507e-06,
95
- "loss": 0.8139,
96
  "step": 3162
97
  },
98
  {
99
  "epoch": 6.0,
100
- "eval_accuracy": 0.7947247706422018,
101
- "eval_loss": 0.8956273794174194,
102
- "eval_runtime": 2.6131,
103
- "eval_samples_per_second": 333.701,
104
- "eval_steps_per_second": 2.679,
105
  "step": 3162
106
  }
107
  ],
108
  "logging_steps": 500,
109
- "max_steps": 5270,
110
  "num_input_tokens_seen": 0,
111
- "num_train_epochs": 10,
112
  "save_steps": 500,
113
  "stateful_callbacks": {
114
  "TrainerControl": {
@@ -117,7 +117,7 @@
117
  "should_evaluate": false,
118
  "should_log": false,
119
  "should_save": true,
120
- "should_training_stop": false
121
  },
122
  "attributes": {}
123
  }
@@ -126,9 +126,9 @@
126
  "train_batch_size": 128,
127
  "trial_name": null,
128
  "trial_params": {
129
- "alpha": 0.27608826195592573,
130
- "learning_rate": 1.0615437425331367e-05,
131
- "num_train_epochs": 10,
132
- "temperature": 2
133
  }
134
  }
 
1
  {
2
+ "best_metric": 0.801605504587156,
3
+ "best_model_checkpoint": "tiny-bert-sst2-distilled/run-0/checkpoint-2108",
4
  "epoch": 6.0,
5
  "eval_steps": 500,
6
  "global_step": 3162,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 12.377176284790039,
14
+ "learning_rate": 1.5980854192157344e-05,
15
+ "loss": 3.0218,
16
  "step": 527
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.7672018348623854,
21
+ "eval_loss": 2.191861391067505,
22
+ "eval_runtime": 2.5906,
23
+ "eval_samples_per_second": 336.603,
24
+ "eval_steps_per_second": 2.702,
25
  "step": 527
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "grad_norm": Infinity,
30
+ "learning_rate": 1.2796813034327135e-05,
31
+ "loss": 2.0797,
32
  "step": 1054
33
  },
34
  {
35
  "epoch": 2.0,
36
+ "eval_accuracy": 0.7809633027522935,
37
+ "eval_loss": 1.8230279684066772,
38
+ "eval_runtime": 2.5719,
39
+ "eval_samples_per_second": 339.043,
40
+ "eval_steps_per_second": 2.722,
41
  "step": 1054
42
  },
43
  {
44
  "epoch": 3.0,
45
+ "grad_norm": 40.96967315673828,
46
+ "learning_rate": 9.600642195895665e-06,
47
+ "loss": 1.697,
48
  "step": 1581
49
  },
50
  {
51
  "epoch": 3.0,
52
+ "eval_accuracy": 0.786697247706422,
53
+ "eval_loss": 1.7212010622024536,
54
+ "eval_runtime": 2.549,
55
+ "eval_samples_per_second": 342.099,
56
+ "eval_steps_per_second": 2.746,
57
  "step": 1581
58
  },
59
  {
60
  "epoch": 4.0,
61
+ "grad_norm": 35.09682846069336,
62
+ "learning_rate": 6.4044713574641965e-06,
63
+ "loss": 1.5166,
64
  "step": 2108
65
  },
66
  {
67
  "epoch": 4.0,
68
+ "eval_accuracy": 0.801605504587156,
69
+ "eval_loss": 1.6234298944473267,
70
+ "eval_runtime": 2.5722,
71
+ "eval_samples_per_second": 339.013,
72
+ "eval_steps_per_second": 2.721,
73
  "step": 2108
74
  },
75
  {
76
  "epoch": 5.0,
77
+ "grad_norm": 10.431685447692871,
78
+ "learning_rate": 3.208300519032727e-06,
79
+ "loss": 1.4053,
80
  "step": 2635
81
  },
82
  {
83
  "epoch": 5.0,
84
+ "eval_accuracy": 0.7993119266055045,
85
+ "eval_loss": 1.5793094635009766,
86
+ "eval_runtime": 2.5626,
87
+ "eval_samples_per_second": 340.279,
88
+ "eval_steps_per_second": 2.732,
89
  "step": 2635
90
  },
91
  {
92
  "epoch": 6.0,
93
+ "grad_norm": 31.159494400024414,
94
+ "learning_rate": 1.2129680601257947e-08,
95
+ "loss": 1.3567,
96
  "step": 3162
97
  },
98
  {
99
  "epoch": 6.0,
100
+ "eval_accuracy": 0.8004587155963303,
101
+ "eval_loss": 1.5706629753112793,
102
+ "eval_runtime": 2.5683,
103
+ "eval_samples_per_second": 339.518,
104
+ "eval_steps_per_second": 2.725,
105
  "step": 3162
106
  }
107
  ],
108
  "logging_steps": 500,
109
+ "max_steps": 3162,
110
  "num_input_tokens_seen": 0,
111
+ "num_train_epochs": 6,
112
  "save_steps": 500,
113
  "stateful_callbacks": {
114
  "TrainerControl": {
 
117
  "should_evaluate": false,
118
  "should_log": false,
119
  "should_save": true,
120
+ "should_training_stop": true
121
  },
122
  "attributes": {}
123
  }
 
126
  "train_batch_size": 128,
127
  "trial_name": null,
128
  "trial_params": {
129
+ "alpha": 0.275210688972374,
130
+ "learning_rate": 1.9177025030588814e-05,
131
+ "num_train_epochs": 6,
132
+ "temperature": 6
133
  }
134
  }
run-0/checkpoint-3162/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c8b6a60f7b85b38fa45cddf1a417ee51250fe5822237403416bf2406ff2cdb84
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15e7b4feae857373f91378a3d0efc15d0ec396bfba71e74c965086843aa6acf4
3
  size 5176