Kira-Floris commited on
Commit
c959a8e
1 Parent(s): 07d8f79

Training in progress, epoch 1

Browse files
logs/events.out.tfevents.1719316225.49cbd00d2005.382.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f35900daa174a6a5fef76c0a33cef30b6c69a9bdc6be0978c13e94e7dc7999d
3
- size 6495
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b2e27b4e3928d3380f15f7def860daf62f2d02843b6dba0fcbc506062197843
3
+ size 7383
logs/events.out.tfevents.1719316946.49cbd00d2005.382.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32027f51491766380aa4b56b9780645239e4a1be2cee3a370fbedf1eeff9b8b9
3
+ size 5428
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:088089bcc78f6a495d727bfd4f2722ab86ef44dde434e0dd36c7765a2949601f
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27cee0faa93c8ebfdb171e60acd1f01bcf52efd270df1bebe4847e474aab2212
3
  size 17549312
run-0/checkpoint-2108/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3270a719696d1eac1554368a9133f99df33c8a3389f4ea80fa8a91759101b0db
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fbddd20b0b4ede2627f3d04a5320f4552ddffca9eeb9250d451b2033379bc42
3
  size 17549312
run-0/checkpoint-2108/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65c25cbb73b435a7d4ec1efcc8c69b5e837bd4a7f4f0ad0a5014110a2ee5907a
3
  size 35123898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66c061125f90063707894535c74eb3be5cb40e6ea90d6e2aa3bc61133a80551e
3
  size 35123898
run-0/checkpoint-2108/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:34f8a6b9de99155238ee5c3df77c36dc9848f72745e188d461083c5c1c2b2802
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:554ff5e83543bed3776599548ea65cf94cd5336e788b551523284db7e73b3034
3
  size 1064
run-0/checkpoint-2108/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.801605504587156,
3
  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-0/checkpoint-2108",
4
  "epoch": 4.0,
5
  "eval_steps": 500,
@@ -10,73 +10,73 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 12.377176284790039,
14
- "learning_rate": 1.5980854192157344e-05,
15
- "loss": 3.0218,
16
  "step": 527
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.7672018348623854,
21
- "eval_loss": 2.191861391067505,
22
- "eval_runtime": 2.5906,
23
- "eval_samples_per_second": 336.603,
24
- "eval_steps_per_second": 2.702,
25
  "step": 527
26
  },
27
  {
28
  "epoch": 2.0,
29
- "grad_norm": Infinity,
30
- "learning_rate": 1.2796813034327135e-05,
31
- "loss": 2.0797,
32
  "step": 1054
33
  },
34
  {
35
  "epoch": 2.0,
36
- "eval_accuracy": 0.7809633027522935,
37
- "eval_loss": 1.8230279684066772,
38
- "eval_runtime": 2.5719,
39
- "eval_samples_per_second": 339.043,
40
- "eval_steps_per_second": 2.722,
41
  "step": 1054
42
  },
43
  {
44
  "epoch": 3.0,
45
- "grad_norm": 40.96967315673828,
46
- "learning_rate": 9.600642195895665e-06,
47
- "loss": 1.697,
48
  "step": 1581
49
  },
50
  {
51
  "epoch": 3.0,
52
- "eval_accuracy": 0.786697247706422,
53
- "eval_loss": 1.7212010622024536,
54
- "eval_runtime": 2.549,
55
- "eval_samples_per_second": 342.099,
56
- "eval_steps_per_second": 2.746,
57
  "step": 1581
58
  },
59
  {
60
  "epoch": 4.0,
61
- "grad_norm": 35.09682846069336,
62
- "learning_rate": 6.4044713574641965e-06,
63
- "loss": 1.5166,
64
  "step": 2108
65
  },
66
  {
67
  "epoch": 4.0,
68
- "eval_accuracy": 0.801605504587156,
69
- "eval_loss": 1.6234298944473267,
70
- "eval_runtime": 2.5722,
71
- "eval_samples_per_second": 339.013,
72
- "eval_steps_per_second": 2.721,
73
  "step": 2108
74
  }
75
  ],
76
  "logging_steps": 500,
77
- "max_steps": 3162,
78
  "num_input_tokens_seen": 0,
79
- "num_train_epochs": 6,
80
  "save_steps": 500,
81
  "stateful_callbacks": {
82
  "TrainerControl": {
@@ -85,7 +85,7 @@
85
  "should_evaluate": false,
86
  "should_log": false,
87
  "should_save": true,
88
- "should_training_stop": false
89
  },
90
  "attributes": {}
91
  }
@@ -94,9 +94,9 @@
94
  "train_batch_size": 128,
95
  "trial_name": null,
96
  "trial_params": {
97
- "alpha": 0.275210688972374,
98
- "learning_rate": 1.9177025030588814e-05,
99
- "num_train_epochs": 6,
100
  "temperature": 6
101
  }
102
  }
 
1
  {
2
+ "best_metric": 0.8245412844036697,
3
  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-0/checkpoint-2108",
4
  "epoch": 4.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 15.690781593322754,
14
+ "learning_rate": 0.0004329892843734803,
15
+ "loss": 1.29,
16
  "step": 527
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.8176605504587156,
21
+ "eval_loss": 1.5847134590148926,
22
+ "eval_runtime": 2.4131,
23
+ "eval_samples_per_second": 361.36,
24
+ "eval_steps_per_second": 2.901,
25
  "step": 527
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "grad_norm": 3.8686888217926025,
30
+ "learning_rate": 0.0002886595229156535,
31
+ "loss": 0.5557,
32
  "step": 1054
33
  },
34
  {
35
  "epoch": 2.0,
36
+ "eval_accuracy": 0.8142201834862385,
37
+ "eval_loss": 1.6788822412490845,
38
+ "eval_runtime": 2.4261,
39
+ "eval_samples_per_second": 359.428,
40
+ "eval_steps_per_second": 2.885,
41
  "step": 1054
42
  },
43
  {
44
  "epoch": 3.0,
45
+ "grad_norm": 6.820591926574707,
46
+ "learning_rate": 0.00014432976145782676,
47
+ "loss": 0.3571,
48
  "step": 1581
49
  },
50
  {
51
  "epoch": 3.0,
52
+ "eval_accuracy": 0.8142201834862385,
53
+ "eval_loss": 1.6854803562164307,
54
+ "eval_runtime": 2.4219,
55
+ "eval_samples_per_second": 360.053,
56
+ "eval_steps_per_second": 2.89,
57
  "step": 1581
58
  },
59
  {
60
  "epoch": 4.0,
61
+ "grad_norm": 5.2299485206604,
62
+ "learning_rate": 2.7387051510024054e-07,
63
+ "loss": 0.2568,
64
  "step": 2108
65
  },
66
  {
67
  "epoch": 4.0,
68
+ "eval_accuracy": 0.8245412844036697,
69
+ "eval_loss": 1.7129724025726318,
70
+ "eval_runtime": 2.3655,
71
+ "eval_samples_per_second": 368.635,
72
+ "eval_steps_per_second": 2.959,
73
  "step": 2108
74
  }
75
  ],
76
  "logging_steps": 500,
77
+ "max_steps": 2108,
78
  "num_input_tokens_seen": 0,
79
+ "num_train_epochs": 4,
80
  "save_steps": 500,
81
  "stateful_callbacks": {
82
  "TrainerControl": {
 
85
  "should_evaluate": false,
86
  "should_log": false,
87
  "should_save": true,
88
+ "should_training_stop": true
89
  },
90
  "attributes": {}
91
  }
 
94
  "train_batch_size": 128,
95
  "trial_name": null,
96
  "trial_params": {
97
+ "alpha": 0.19981548442581198,
98
+ "learning_rate": 0.000577319045831307,
99
+ "num_train_epochs": 4,
100
  "temperature": 6
101
  }
102
  }
run-0/checkpoint-2108/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15e7b4feae857373f91378a3d0efc15d0ec396bfba71e74c965086843aa6acf4
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b553ab9101fed6c5a9e75fca4cb81df81514d2d3f8ff94edc0c546949c119f9
3
  size 5176
run-1/checkpoint-527/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d3cdebd4645b1d3aedb598d74b637f5ed037f958b572ee80608133aee3a9976
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27cee0faa93c8ebfdb171e60acd1f01bcf52efd270df1bebe4847e474aab2212
3
  size 17549312
run-1/checkpoint-527/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:acf0b6e139990c3307f08d479d701e93926f0f09437c5b321cb9081a954e142e
3
  size 35123898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a48627b1c935eed466d727794d0522c059daa68f155eb56beaf8d87630343e1
3
  size 35123898
run-1/checkpoint-527/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bcb5b0e469d1d3eab700f24f3dfac2d47dd96a3c9a2df7246109443b6077d1bd
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85d97ac1e0817f45ac7080fb40048f460e8564b8fd99fdcde00fe437f7d077f7
3
  size 1064
run-1/checkpoint-527/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.8268348623853211,
3
  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-1/checkpoint-527",
4
  "epoch": 1.0,
5
  "eval_steps": 500,
@@ -10,18 +10,18 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 20.18393898010254,
14
- "learning_rate": 0.0004400348679504635,
15
- "loss": 1.3749,
16
  "step": 527
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.8268348623853211,
21
- "eval_loss": 1.6724135875701904,
22
- "eval_runtime": 2.5582,
23
- "eval_samples_per_second": 340.862,
24
- "eval_steps_per_second": 2.736,
25
  "step": 527
26
  }
27
  ],
@@ -46,9 +46,9 @@
46
  "train_batch_size": 128,
47
  "trial_name": null,
48
  "trial_params": {
49
- "alpha": 0.22319309854701086,
50
- "learning_rate": 0.0006600523019256953,
51
  "num_train_epochs": 3,
52
- "temperature": 16
53
  }
54
  }
 
1
  {
2
+ "best_metric": 0.8130733944954128,
3
  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-1/checkpoint-527",
4
  "epoch": 1.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 6.467271327972412,
14
+ "learning_rate": 0.0005246601812780973,
15
+ "loss": 0.349,
16
  "step": 527
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.8130733944954128,
21
+ "eval_loss": 0.5175108313560486,
22
+ "eval_runtime": 2.3764,
23
+ "eval_samples_per_second": 366.943,
24
+ "eval_steps_per_second": 2.946,
25
  "step": 527
26
  }
27
  ],
 
46
  "train_batch_size": 128,
47
  "trial_name": null,
48
  "trial_params": {
49
+ "alpha": 0.9438943885488221,
50
+ "learning_rate": 0.0007869902719171459,
51
  "num_train_epochs": 3,
52
+ "temperature": 2
53
  }
54
  }
run-1/checkpoint-527/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee9990afd96f204321e9e33fc4071f73d76355111015fd2ec723f4a4e4849ce7
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13747d4a4002463522d34a00fd1d294fed1696984634001cefecc3ef3eb702bf
3
  size 5176
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b553ab9101fed6c5a9e75fca4cb81df81514d2d3f8ff94edc0c546949c119f9
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13747d4a4002463522d34a00fd1d294fed1696984634001cefecc3ef3eb702bf
3
  size 5176