stuser2023 commited on
Commit
9f000f3
1 Parent(s): c128b2d

Training in progress, epoch 4

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca8fe9f4d3b1eda4a732430841ce9a13e294d4165cbbeecd8e420652e2ca1d59
3
  size 267832560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fab592c67f9aa38c48d9dab26dc2834ae1fde2e535f26faed2bb0a9f4e0cef2
3
  size 267832560
run-1/checkpoint-1072/config.json CHANGED
@@ -20,6 +20,6 @@
20
  "sinusoidal_pos_embds": false,
21
  "tie_weights_": true,
22
  "torch_dtype": "float32",
23
- "transformers_version": "4.38.2",
24
  "vocab_size": 30522
25
  }
 
20
  "sinusoidal_pos_embds": false,
21
  "tie_weights_": true,
22
  "torch_dtype": "float32",
23
+ "transformers_version": "4.45.0",
24
  "vocab_size": 30522
25
  }
run-1/checkpoint-1072/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9992887f6907f8916987f3e3d5b5cd5f9a668b44cebeae398a79e142aced221
3
  size 267832560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fab592c67f9aa38c48d9dab26dc2834ae1fde2e535f26faed2bb0a9f4e0cef2
3
  size 267832560
run-1/checkpoint-1072/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6fc9517b39355720ca587337ba7563051682d885fb178f3d079846331368417c
3
  size 535727290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d64bd1b3cd64174fcf21cc7191339a20d65d127c808ebb360e7cbfed0e4ab6f
3
  size 535727290
run-1/checkpoint-1072/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32ddadbd4e9ce4478d8580a2b23d53d37db7c6d53eeed56612b957a8c00b76dc
3
- size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9be557abd511148330f4618597443cd2d735141778483b075e669b8068a498e5
3
+ size 14244
run-1/checkpoint-1072/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3765e02ab4bcae5228f1b22b52d5b68ecef8a19a4fc159b8e8f029723c547b6
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ee211d1f52c748ba0df37e854b3aeed772675565bd36b91a1cfe786c1df4766
3
  size 1064
run-1/checkpoint-1072/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.46849580082104064,
3
- "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-1/checkpoint-804",
4
  "epoch": 4.0,
5
  "eval_steps": 500,
6
  "global_step": 1072,
@@ -10,52 +10,52 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_loss": 0.5035493969917297,
14
- "eval_matthews_correlation": 0.4141806938515634,
15
- "eval_runtime": 0.7419,
16
- "eval_samples_per_second": 1405.832,
17
- "eval_steps_per_second": 88.96,
18
  "step": 268
19
  },
20
  {
21
- "epoch": 1.87,
22
- "grad_norm": 4.896795749664307,
23
- "learning_rate": 5.453884401804003e-06,
24
- "loss": 0.4997,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
- "eval_loss": 0.4967799484729767,
30
- "eval_matthews_correlation": 0.44251427534495513,
31
- "eval_runtime": 0.8433,
32
- "eval_samples_per_second": 1236.743,
33
- "eval_steps_per_second": 78.26,
34
  "step": 536
35
  },
36
  {
37
  "epoch": 3.0,
38
- "eval_loss": 0.49384433031082153,
39
- "eval_matthews_correlation": 0.46849580082104064,
40
- "eval_runtime": 1.1521,
41
- "eval_samples_per_second": 905.341,
42
- "eval_steps_per_second": 57.289,
43
  "step": 804
44
  },
45
  {
46
- "epoch": 3.73,
47
- "grad_norm": 8.912487030029297,
48
- "learning_rate": 6.865029316956087e-07,
49
- "loss": 0.3522,
50
  "step": 1000
51
  },
52
  {
53
  "epoch": 4.0,
54
- "eval_loss": 0.5385918021202087,
55
- "eval_matthews_correlation": 0.430812413056651,
56
- "eval_runtime": 0.8323,
57
- "eval_samples_per_second": 1253.098,
58
- "eval_steps_per_second": 79.295,
59
  "step": 1072
60
  }
61
  ],
@@ -64,13 +64,25 @@
64
  "num_input_tokens_seen": 0,
65
  "num_train_epochs": 4,
66
  "save_steps": 500,
67
- "total_flos": 192081608932020.0,
 
 
 
 
 
 
 
 
 
 
 
 
68
  "train_batch_size": 32,
69
  "trial_name": null,
70
  "trial_params": {
71
- "learning_rate": 1.0221265871912396e-05,
72
  "num_train_epochs": 4,
73
  "per_device_train_batch_size": 32,
74
- "seed": 8
75
  }
76
  }
 
1
  {
2
+ "best_metric": 0.44997112166326025,
3
+ "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-1/checkpoint-1072",
4
  "epoch": 4.0,
5
  "eval_steps": 500,
6
  "global_step": 1072,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_loss": 0.5182795524597168,
14
+ "eval_matthews_correlation": 0.3637991719695883,
15
+ "eval_runtime": 0.8749,
16
+ "eval_samples_per_second": 1192.137,
17
+ "eval_steps_per_second": 75.437,
18
  "step": 268
19
  },
20
  {
21
+ "epoch": 1.8656716417910446,
22
+ "grad_norm": 5.394637107849121,
23
+ "learning_rate": 4.082371431187213e-06,
24
+ "loss": 0.5021,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "eval_loss": 0.5294386148452759,
30
+ "eval_matthews_correlation": 0.44429652763655303,
31
+ "eval_runtime": 0.8769,
32
+ "eval_samples_per_second": 1189.431,
33
+ "eval_steps_per_second": 75.266,
34
  "step": 536
35
  },
36
  {
37
  "epoch": 3.0,
38
+ "eval_loss": 0.4963017404079437,
39
+ "eval_matthews_correlation": 0.446244823436024,
40
+ "eval_runtime": 0.9086,
41
+ "eval_samples_per_second": 1147.897,
42
+ "eval_steps_per_second": 72.638,
43
  "step": 804
44
  },
45
  {
46
+ "epoch": 3.7313432835820897,
47
+ "grad_norm": 8.369268417358398,
48
+ "learning_rate": 5.138649353941947e-07,
49
+ "loss": 0.379,
50
  "step": 1000
51
  },
52
  {
53
  "epoch": 4.0,
54
+ "eval_loss": 0.5046903491020203,
55
+ "eval_matthews_correlation": 0.44997112166326025,
56
+ "eval_runtime": 1.1052,
57
+ "eval_samples_per_second": 943.695,
58
+ "eval_steps_per_second": 59.716,
59
  "step": 1072
60
  }
61
  ],
 
64
  "num_input_tokens_seen": 0,
65
  "num_train_epochs": 4,
66
  "save_steps": 500,
67
+ "stateful_callbacks": {
68
+ "TrainerControl": {
69
+ "args": {
70
+ "should_epoch_stop": false,
71
+ "should_evaluate": false,
72
+ "should_log": false,
73
+ "should_save": true,
74
+ "should_training_stop": true
75
+ },
76
+ "attributes": {}
77
+ }
78
+ },
79
+ "total_flos": 191790801595908.0,
80
  "train_batch_size": 32,
81
  "trial_name": null,
82
  "trial_params": {
83
+ "learning_rate": 7.650877926980232e-06,
84
  "num_train_epochs": 4,
85
  "per_device_train_batch_size": 32,
86
+ "seed": 28
87
  }
88
  }
run-1/checkpoint-1072/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:29d6b485a188c4e5c57b6f3bb948e3315674066d3a6ec8f44d6d328263cd659b
3
- size 4984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55916040b5530f85fbbef38cd42dcd18934cd6b77c8a8a1a110fdc3d400f34d0
3
+ size 5304
run-1/checkpoint-536/config.json CHANGED
@@ -20,6 +20,6 @@
20
  "sinusoidal_pos_embds": false,
21
  "tie_weights_": true,
22
  "torch_dtype": "float32",
23
- "transformers_version": "4.38.2",
24
  "vocab_size": 30522
25
  }
 
20
  "sinusoidal_pos_embds": false,
21
  "tie_weights_": true,
22
  "torch_dtype": "float32",
23
+ "transformers_version": "4.45.0",
24
  "vocab_size": 30522
25
  }
run-1/checkpoint-536/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:362d9c0cd2587fbb0107cad0126e6f5d8cff717dff52b10facf7769e2fb5712d
3
  size 267832560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d47c92c5e36c8a03fde5daea74331ab41f371d64d924be029b623731f380590
3
  size 267832560
run-1/checkpoint-536/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5212b8745eb1c4c6ef881e3d21b4023283b2bc6765d8cb3aab22e3fe10e74e9
3
  size 535727290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9302b24adc716d30cf9a7ae2582dd2e7391d94d421be52de496081e5fb9e75b
3
  size 535727290
run-1/checkpoint-536/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3c3a6d458d1ea31703d0759423a03a8df70fee2479aeb6598078341111b8adb
3
- size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f9e3aaf6b37b75264fbb2c562b3c32515689962db3ac97d7b0606726a5f7b4f
3
+ size 14244
run-1/checkpoint-536/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b05a47d04eae8f5a42931783c79ad8a5775116597c4d7a406555dd541bfaf966
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b92c13dbfb54f85d196c6521b1ca7b6d5d3d5505259ca1f56b826b83152adafb
3
  size 1064
run-1/checkpoint-536/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.44251427534495513,
3
  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-1/checkpoint-536",
4
  "epoch": 2.0,
5
  "eval_steps": 500,
@@ -10,27 +10,27 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_loss": 0.5035493969917297,
14
- "eval_matthews_correlation": 0.4141806938515634,
15
- "eval_runtime": 0.7419,
16
- "eval_samples_per_second": 1405.832,
17
- "eval_steps_per_second": 88.96,
18
  "step": 268
19
  },
20
  {
21
- "epoch": 1.87,
22
- "grad_norm": 4.896795749664307,
23
- "learning_rate": 5.453884401804003e-06,
24
- "loss": 0.4997,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
- "eval_loss": 0.4967799484729767,
30
- "eval_matthews_correlation": 0.44251427534495513,
31
- "eval_runtime": 0.8433,
32
- "eval_samples_per_second": 1236.743,
33
- "eval_steps_per_second": 78.26,
34
  "step": 536
35
  }
36
  ],
@@ -39,13 +39,25 @@
39
  "num_input_tokens_seen": 0,
40
  "num_train_epochs": 4,
41
  "save_steps": 500,
42
- "total_flos": 96116481642000.0,
 
 
 
 
 
 
 
 
 
 
 
 
43
  "train_batch_size": 32,
44
  "trial_name": null,
45
  "trial_params": {
46
- "learning_rate": 1.0221265871912396e-05,
47
  "num_train_epochs": 4,
48
  "per_device_train_batch_size": 32,
49
- "seed": 8
50
  }
51
  }
 
1
  {
2
+ "best_metric": 0.44429652763655303,
3
  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-1/checkpoint-536",
4
  "epoch": 2.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_loss": 0.5182795524597168,
14
+ "eval_matthews_correlation": 0.3637991719695883,
15
+ "eval_runtime": 0.8749,
16
+ "eval_samples_per_second": 1192.137,
17
+ "eval_steps_per_second": 75.437,
18
  "step": 268
19
  },
20
  {
21
+ "epoch": 1.8656716417910446,
22
+ "grad_norm": 5.394637107849121,
23
+ "learning_rate": 4.082371431187213e-06,
24
+ "loss": 0.5021,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "eval_loss": 0.5294386148452759,
30
+ "eval_matthews_correlation": 0.44429652763655303,
31
+ "eval_runtime": 0.8769,
32
+ "eval_samples_per_second": 1189.431,
33
+ "eval_steps_per_second": 75.266,
34
  "step": 536
35
  }
36
  ],
 
39
  "num_input_tokens_seen": 0,
40
  "num_train_epochs": 4,
41
  "save_steps": 500,
42
+ "stateful_callbacks": {
43
+ "TrainerControl": {
44
+ "args": {
45
+ "should_epoch_stop": false,
46
+ "should_evaluate": false,
47
+ "should_log": false,
48
+ "should_save": true,
49
+ "should_training_stop": false
50
+ },
51
+ "attributes": {}
52
+ }
53
+ },
54
+ "total_flos": 95730204637716.0,
55
  "train_batch_size": 32,
56
  "trial_name": null,
57
  "trial_params": {
58
+ "learning_rate": 7.650877926980232e-06,
59
  "num_train_epochs": 4,
60
  "per_device_train_batch_size": 32,
61
+ "seed": 28
62
  }
63
  }
run-1/checkpoint-536/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:29d6b485a188c4e5c57b6f3bb948e3315674066d3a6ec8f44d6d328263cd659b
3
- size 4984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55916040b5530f85fbbef38cd42dcd18934cd6b77c8a8a1a110fdc3d400f34d0
3
+ size 5304
run-1/checkpoint-804/config.json CHANGED
@@ -20,6 +20,6 @@
20
  "sinusoidal_pos_embds": false,
21
  "tie_weights_": true,
22
  "torch_dtype": "float32",
23
- "transformers_version": "4.38.2",
24
  "vocab_size": 30522
25
  }
 
20
  "sinusoidal_pos_embds": false,
21
  "tie_weights_": true,
22
  "torch_dtype": "float32",
23
+ "transformers_version": "4.45.0",
24
  "vocab_size": 30522
25
  }
run-1/checkpoint-804/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4784b2cdaf1f4f2a65b67673cf9a474f50aacc6b6b52a55770494552957df28b
3
  size 267832560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a83a3ee2028627cff6d49c087171ffc2d96b89101278ec92ba41d203888f48d
3
  size 267832560
run-1/checkpoint-804/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:494174d39b2739ea5767bec872526ea91a8f51fb559ed3ecd20e2d63381bd46c
3
  size 535727290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc803f94d7404c4fb2922a20bca972cf6c37b31c3ce75f8ac3fbd77275346de3
3
  size 535727290
run-1/checkpoint-804/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65c4502cc2c96e2c95cfbd885846a4486d9bbe6f82fd2b8441148c27de597eb2
3
- size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:409b9078199aaff19d2f454e6d0a0a6b622f5595cbc7effb5585b7100c2cc7d8
3
+ size 14244
run-1/checkpoint-804/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76cbf4faf8458eeab8085fe97b4e0f9442c5aa8273abf11a8f8bb22a481d6a86
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:242b87443cb49e20de4211b0f90b9a4c63be961056ed1c678ddb6e4e0d78afe2
3
  size 1064
run-1/checkpoint-804/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.46849580082104064,
3
  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-1/checkpoint-804",
4
  "epoch": 3.0,
5
  "eval_steps": 500,
@@ -10,36 +10,36 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_loss": 0.5035493969917297,
14
- "eval_matthews_correlation": 0.4141806938515634,
15
- "eval_runtime": 0.7419,
16
- "eval_samples_per_second": 1405.832,
17
- "eval_steps_per_second": 88.96,
18
  "step": 268
19
  },
20
  {
21
- "epoch": 1.87,
22
- "grad_norm": 4.896795749664307,
23
- "learning_rate": 5.453884401804003e-06,
24
- "loss": 0.4997,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
- "eval_loss": 0.4967799484729767,
30
- "eval_matthews_correlation": 0.44251427534495513,
31
- "eval_runtime": 0.8433,
32
- "eval_samples_per_second": 1236.743,
33
- "eval_steps_per_second": 78.26,
34
  "step": 536
35
  },
36
  {
37
  "epoch": 3.0,
38
- "eval_loss": 0.49384433031082153,
39
- "eval_matthews_correlation": 0.46849580082104064,
40
- "eval_runtime": 1.1521,
41
- "eval_samples_per_second": 905.341,
42
- "eval_steps_per_second": 57.289,
43
  "step": 804
44
  }
45
  ],
@@ -48,13 +48,25 @@
48
  "num_input_tokens_seen": 0,
49
  "num_train_epochs": 4,
50
  "save_steps": 500,
51
- "total_flos": 96116481642000.0,
 
 
 
 
 
 
 
 
 
 
 
 
52
  "train_batch_size": 32,
53
  "trial_name": null,
54
  "trial_params": {
55
- "learning_rate": 1.0221265871912396e-05,
56
  "num_train_epochs": 4,
57
  "per_device_train_batch_size": 32,
58
- "seed": 8
59
  }
60
  }
 
1
  {
2
+ "best_metric": 0.446244823436024,
3
  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-1/checkpoint-804",
4
  "epoch": 3.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_loss": 0.5182795524597168,
14
+ "eval_matthews_correlation": 0.3637991719695883,
15
+ "eval_runtime": 0.8749,
16
+ "eval_samples_per_second": 1192.137,
17
+ "eval_steps_per_second": 75.437,
18
  "step": 268
19
  },
20
  {
21
+ "epoch": 1.8656716417910446,
22
+ "grad_norm": 5.394637107849121,
23
+ "learning_rate": 4.082371431187213e-06,
24
+ "loss": 0.5021,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "eval_loss": 0.5294386148452759,
30
+ "eval_matthews_correlation": 0.44429652763655303,
31
+ "eval_runtime": 0.8769,
32
+ "eval_samples_per_second": 1189.431,
33
+ "eval_steps_per_second": 75.266,
34
  "step": 536
35
  },
36
  {
37
  "epoch": 3.0,
38
+ "eval_loss": 0.4963017404079437,
39
+ "eval_matthews_correlation": 0.446244823436024,
40
+ "eval_runtime": 0.9086,
41
+ "eval_samples_per_second": 1147.897,
42
+ "eval_steps_per_second": 72.638,
43
  "step": 804
44
  }
45
  ],
 
48
  "num_input_tokens_seen": 0,
49
  "num_train_epochs": 4,
50
  "save_steps": 500,
51
+ "stateful_callbacks": {
52
+ "TrainerControl": {
53
+ "args": {
54
+ "should_epoch_stop": false,
55
+ "should_evaluate": false,
56
+ "should_log": false,
57
+ "should_save": true,
58
+ "should_training_stop": false
59
+ },
60
+ "attributes": {}
61
+ }
62
+ },
63
+ "total_flos": 95730204637716.0,
64
  "train_batch_size": 32,
65
  "trial_name": null,
66
  "trial_params": {
67
+ "learning_rate": 7.650877926980232e-06,
68
  "num_train_epochs": 4,
69
  "per_device_train_batch_size": 32,
70
+ "seed": 28
71
  }
72
  }
run-1/checkpoint-804/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:29d6b485a188c4e5c57b6f3bb948e3315674066d3a6ec8f44d6d328263cd659b
3
- size 4984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55916040b5530f85fbbef38cd42dcd18934cd6b77c8a8a1a110fdc3d400f34d0
3
+ size 5304
runs/Sep26_13-34-33_f266a1036e13/events.out.tfevents.1727358189.f266a1036e13.258.3 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ebf518373c6afe38ee8e31b1e4a6d789d20ac210800b0f4d65682c148044d916
3
- size 5841
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:809731c6da6eb614081da5f258bc5912713a5c1119ce0f3170b0c3a2a014040c
3
+ size 7076