Kira-Floris commited on
Commit
07d8f79
1 Parent(s): 3402760

Training in progress, epoch 3

Browse files
logs/events.out.tfevents.1719316225.49cbd00d2005.382.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6cd44486db105a3795a27bd7e66817354b0b0c46c44ae0bd71e8c1ac02b9c2e4
3
- size 5961
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f35900daa174a6a5fef76c0a33cef30b6c69a9bdc6be0978c13e94e7dc7999d
3
+ size 6495
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9880a4d6b160b708dee866b97b67f4767461cdec03dbdef71638961456f15266
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:088089bcc78f6a495d727bfd4f2722ab86ef44dde434e0dd36c7765a2949601f
3
  size 17549312
run-0/checkpoint-1581/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:053f4e74e37bd46e97455119628ae6d07b968538e4db43c7805f3b05ff409a39
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:088089bcc78f6a495d727bfd4f2722ab86ef44dde434e0dd36c7765a2949601f
3
  size 17549312
run-0/checkpoint-1581/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa80d5ade6ff2f3070e08ba4f385965ae39d536ef54fd0fc63a64eb79be2dd74
3
  size 35123898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2eab7a50471167ccdec8f251e96d92f16211722a1ba3ce445def961439cd852
3
  size 35123898
run-0/checkpoint-1581/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b63263b846c1da873d89d97103d702d45aa231d1b709bf63a6038446c1306d74
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b7f8a1250df84a05c873d7a11e46287a5e3ea18e01e3d4c7581abd4866e3ed4
3
  size 1064
run-0/checkpoint-1581/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.786697247706422,
3
- "best_model_checkpoint": "tiny-bert-sst2-distilled/run-0/checkpoint-1581",
4
  "epoch": 3.0,
5
  "eval_steps": 500,
6
  "global_step": 1581,
@@ -10,57 +10,57 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 12.377176284790039,
14
- "learning_rate": 1.5980854192157344e-05,
15
- "loss": 3.0218,
16
  "step": 527
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.7672018348623854,
21
- "eval_loss": 2.191861391067505,
22
- "eval_runtime": 2.5906,
23
- "eval_samples_per_second": 336.603,
24
- "eval_steps_per_second": 2.702,
25
  "step": 527
26
  },
27
  {
28
  "epoch": 2.0,
29
- "grad_norm": Infinity,
30
- "learning_rate": 1.2796813034327135e-05,
31
- "loss": 2.0797,
32
  "step": 1054
33
  },
34
  {
35
  "epoch": 2.0,
36
- "eval_accuracy": 0.7809633027522935,
37
- "eval_loss": 1.8230279684066772,
38
- "eval_runtime": 2.5719,
39
- "eval_samples_per_second": 339.043,
40
- "eval_steps_per_second": 2.722,
41
  "step": 1054
42
  },
43
  {
44
  "epoch": 3.0,
45
- "grad_norm": 40.96967315673828,
46
- "learning_rate": 9.600642195895665e-06,
47
- "loss": 1.697,
48
  "step": 1581
49
  },
50
  {
51
  "epoch": 3.0,
52
- "eval_accuracy": 0.786697247706422,
53
- "eval_loss": 1.7212010622024536,
54
- "eval_runtime": 2.549,
55
- "eval_samples_per_second": 342.099,
56
- "eval_steps_per_second": 2.746,
57
  "step": 1581
58
  }
59
  ],
60
  "logging_steps": 500,
61
- "max_steps": 3162,
62
  "num_input_tokens_seen": 0,
63
- "num_train_epochs": 6,
64
  "save_steps": 500,
65
  "stateful_callbacks": {
66
  "TrainerControl": {
@@ -78,9 +78,9 @@
78
  "train_batch_size": 128,
79
  "trial_name": null,
80
  "trial_params": {
81
- "alpha": 0.275210688972374,
82
- "learning_rate": 1.9177025030588814e-05,
83
- "num_train_epochs": 6,
84
  "temperature": 6
85
  }
86
  }
 
1
  {
2
+ "best_metric": 0.8176605504587156,
3
+ "best_model_checkpoint": "tiny-bert-sst2-distilled/run-0/checkpoint-527",
4
  "epoch": 3.0,
5
  "eval_steps": 500,
6
  "global_step": 1581,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 15.690781593322754,
14
+ "learning_rate": 0.0004329892843734803,
15
+ "loss": 1.29,
16
  "step": 527
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.8176605504587156,
21
+ "eval_loss": 1.5847134590148926,
22
+ "eval_runtime": 2.4131,
23
+ "eval_samples_per_second": 361.36,
24
+ "eval_steps_per_second": 2.901,
25
  "step": 527
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "grad_norm": 3.8686888217926025,
30
+ "learning_rate": 0.0002886595229156535,
31
+ "loss": 0.5557,
32
  "step": 1054
33
  },
34
  {
35
  "epoch": 2.0,
36
+ "eval_accuracy": 0.8142201834862385,
37
+ "eval_loss": 1.6788822412490845,
38
+ "eval_runtime": 2.4261,
39
+ "eval_samples_per_second": 359.428,
40
+ "eval_steps_per_second": 2.885,
41
  "step": 1054
42
  },
43
  {
44
  "epoch": 3.0,
45
+ "grad_norm": 6.820591926574707,
46
+ "learning_rate": 0.00014432976145782676,
47
+ "loss": 0.3571,
48
  "step": 1581
49
  },
50
  {
51
  "epoch": 3.0,
52
+ "eval_accuracy": 0.8142201834862385,
53
+ "eval_loss": 1.6854803562164307,
54
+ "eval_runtime": 2.4219,
55
+ "eval_samples_per_second": 360.053,
56
+ "eval_steps_per_second": 2.89,
57
  "step": 1581
58
  }
59
  ],
60
  "logging_steps": 500,
61
+ "max_steps": 2108,
62
  "num_input_tokens_seen": 0,
63
+ "num_train_epochs": 4,
64
  "save_steps": 500,
65
  "stateful_callbacks": {
66
  "TrainerControl": {
 
78
  "train_batch_size": 128,
79
  "trial_name": null,
80
  "trial_params": {
81
+ "alpha": 0.19981548442581198,
82
+ "learning_rate": 0.000577319045831307,
83
+ "num_train_epochs": 4,
84
  "temperature": 6
85
  }
86
  }
run-0/checkpoint-1581/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15e7b4feae857373f91378a3d0efc15d0ec396bfba71e74c965086843aa6acf4
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b553ab9101fed6c5a9e75fca4cb81df81514d2d3f8ff94edc0c546949c119f9
3
  size 5176