Kira-Floris commited on
Commit
3402760
1 Parent(s): d6cdca8

Training in progress, epoch 2

Browse files
logs/events.out.tfevents.1719316225.49cbd00d2005.382.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:298724a4bb557672c166bc1e25254912866c59cdddf96831c2184896ca9bdd45
3
- size 5427
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cd44486db105a3795a27bd7e66817354b0b0c46c44ae0bd71e8c1ac02b9c2e4
3
+ size 5961
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e4101e13540b9ca54dbd8ddf761e2b1b8af8108fb9147967fcd9ee7dd0b7ff1
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9880a4d6b160b708dee866b97b67f4767461cdec03dbdef71638961456f15266
3
  size 17549312
run-0/checkpoint-1054/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc0fee5c27b13a0a9c632d2aac5f70e2a09379e669796176adfed370c09bec74
3
  size 17549312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9880a4d6b160b708dee866b97b67f4767461cdec03dbdef71638961456f15266
3
  size 17549312
run-0/checkpoint-1054/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:180bafd9040bf7f35eca41800b8bc8fea98cfd11101725956e8708ac27fa2b2f
3
  size 35123898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0d8a608e526473174b7c31c2121364dd63d55f262ae5e4c78252908e6e661c4
3
  size 35123898
run-0/checkpoint-1054/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:100464d33f6556fd1f8f558519b1030d76a9ca5f1f9e07ec8e190d748723f68d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a77675298fd65dc14bb0a83836346073a553ae69d629ef591046deeaac3870d
3
  size 1064
run-0/checkpoint-1054/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.7809633027522935,
3
- "best_model_checkpoint": "tiny-bert-sst2-distilled/run-0/checkpoint-1054",
4
  "epoch": 2.0,
5
  "eval_steps": 500,
6
  "global_step": 1054,
@@ -10,41 +10,41 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 12.377176284790039,
14
- "learning_rate": 1.5980854192157344e-05,
15
- "loss": 3.0218,
16
  "step": 527
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.7672018348623854,
21
- "eval_loss": 2.191861391067505,
22
- "eval_runtime": 2.5906,
23
- "eval_samples_per_second": 336.603,
24
- "eval_steps_per_second": 2.702,
25
  "step": 527
26
  },
27
  {
28
  "epoch": 2.0,
29
- "grad_norm": Infinity,
30
- "learning_rate": 1.2796813034327135e-05,
31
- "loss": 2.0797,
32
  "step": 1054
33
  },
34
  {
35
  "epoch": 2.0,
36
- "eval_accuracy": 0.7809633027522935,
37
- "eval_loss": 1.8230279684066772,
38
- "eval_runtime": 2.5719,
39
- "eval_samples_per_second": 339.043,
40
- "eval_steps_per_second": 2.722,
41
  "step": 1054
42
  }
43
  ],
44
  "logging_steps": 500,
45
- "max_steps": 3162,
46
  "num_input_tokens_seen": 0,
47
- "num_train_epochs": 6,
48
  "save_steps": 500,
49
  "stateful_callbacks": {
50
  "TrainerControl": {
@@ -62,9 +62,9 @@
62
  "train_batch_size": 128,
63
  "trial_name": null,
64
  "trial_params": {
65
- "alpha": 0.275210688972374,
66
- "learning_rate": 1.9177025030588814e-05,
67
- "num_train_epochs": 6,
68
  "temperature": 6
69
  }
70
  }
 
1
  {
2
+ "best_metric": 0.8176605504587156,
3
+ "best_model_checkpoint": "tiny-bert-sst2-distilled/run-0/checkpoint-527",
4
  "epoch": 2.0,
5
  "eval_steps": 500,
6
  "global_step": 1054,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 15.690781593322754,
14
+ "learning_rate": 0.0004329892843734803,
15
+ "loss": 1.29,
16
  "step": 527
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.8176605504587156,
21
+ "eval_loss": 1.5847134590148926,
22
+ "eval_runtime": 2.4131,
23
+ "eval_samples_per_second": 361.36,
24
+ "eval_steps_per_second": 2.901,
25
  "step": 527
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "grad_norm": 3.8686888217926025,
30
+ "learning_rate": 0.0002886595229156535,
31
+ "loss": 0.5557,
32
  "step": 1054
33
  },
34
  {
35
  "epoch": 2.0,
36
+ "eval_accuracy": 0.8142201834862385,
37
+ "eval_loss": 1.6788822412490845,
38
+ "eval_runtime": 2.4261,
39
+ "eval_samples_per_second": 359.428,
40
+ "eval_steps_per_second": 2.885,
41
  "step": 1054
42
  }
43
  ],
44
  "logging_steps": 500,
45
+ "max_steps": 2108,
46
  "num_input_tokens_seen": 0,
47
+ "num_train_epochs": 4,
48
  "save_steps": 500,
49
  "stateful_callbacks": {
50
  "TrainerControl": {
 
62
  "train_batch_size": 128,
63
  "trial_name": null,
64
  "trial_params": {
65
+ "alpha": 0.19981548442581198,
66
+ "learning_rate": 0.000577319045831307,
67
+ "num_train_epochs": 4,
68
  "temperature": 6
69
  }
70
  }
run-0/checkpoint-1054/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15e7b4feae857373f91378a3d0efc15d0ec396bfba71e74c965086843aa6acf4
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b553ab9101fed6c5a9e75fca4cb81df81514d2d3f8ff94edc0c546949c119f9
3
  size 5176