saqidr commited on
Commit
4c0200d
·
verified ·
1 Parent(s): 9588c9c

Training in progress, step 500

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9bd8a2f3d3b78a273316722aa1b2f3f93bfa9a4935b4aae13a5055dd3a5dea73
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b64fce98847974a1fbc15a5211620e911f29d0a6fa5bc6a5bf2e0acabc13c361
3
  size 268290900
run-0/checkpoint-1000/config.json CHANGED
@@ -326,6 +326,6 @@
326
  "sinusoidal_pos_embds": false,
327
  "tie_weights_": true,
328
  "torch_dtype": "float32",
329
- "transformers_version": "4.37.2",
330
  "vocab_size": 30522
331
  }
 
326
  "sinusoidal_pos_embds": false,
327
  "tie_weights_": true,
328
  "torch_dtype": "float32",
329
+ "transformers_version": "4.41.1",
330
  "vocab_size": 30522
331
  }
run-0/checkpoint-1000/tokenizer.json CHANGED
@@ -1,11 +1,6 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 512,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
- },
9
  "padding": null,
10
  "added_tokens": [
11
  {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
run-0/checkpoint-1000/trainer_state.json CHANGED
@@ -10,55 +10,69 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.5958064516129032,
14
- "eval_loss": 0.1949465274810791,
15
- "eval_runtime": 1.357,
16
- "eval_samples_per_second": 2284.501,
17
- "eval_steps_per_second": 47.901,
18
  "step": 318
19
  },
20
  {
21
- "epoch": 1.57,
22
- "learning_rate": 1.606918238993711e-05,
23
- "loss": 0.3113,
 
24
  "step": 500
25
  },
26
  {
27
  "epoch": 2.0,
28
- "eval_accuracy": 0.8164516129032258,
29
- "eval_loss": 0.09700000286102295,
30
- "eval_runtime": 1.3602,
31
- "eval_samples_per_second": 2279.158,
32
- "eval_steps_per_second": 47.789,
33
  "step": 636
34
  },
35
  {
36
  "epoch": 3.0,
37
- "eval_accuracy": 0.8754838709677419,
38
- "eval_loss": 0.0647290050983429,
39
- "eval_runtime": 1.3748,
40
- "eval_samples_per_second": 2254.826,
41
- "eval_steps_per_second": 47.279,
42
  "step": 954
43
  },
44
  {
45
- "epoch": 3.14,
46
- "learning_rate": 1.2138364779874214e-05,
47
- "loss": 0.1105,
 
48
  "step": 1000
49
  }
50
  ],
51
  "logging_steps": 500,
52
- "max_steps": 2544,
53
  "num_input_tokens_seen": 0,
54
- "num_train_epochs": 8,
55
  "save_steps": 500,
56
- "total_flos": 259991364709020.0,
 
 
 
 
 
 
 
 
 
 
 
 
57
  "train_batch_size": 48,
58
  "trial_name": null,
59
  "trial_params": {
60
- "alpha": 0.05314446157998587,
61
- "num_train_epochs": 8,
62
- "temperature": 17
63
  }
64
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.6674193548387096,
14
+ "eval_loss": 0.416048139333725,
15
+ "eval_runtime": 1.3187,
16
+ "eval_samples_per_second": 2350.748,
17
+ "eval_steps_per_second": 49.29,
18
  "step": 318
19
  },
20
  {
21
+ "epoch": 1.5723270440251573,
22
+ "grad_norm": 0.9905994534492493,
23
+ "learning_rate": 1.550763701707098e-05,
24
+ "loss": 0.6477,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "eval_accuracy": 0.8332258064516129,
30
+ "eval_loss": 0.15088918805122375,
31
+ "eval_runtime": 1.3272,
32
+ "eval_samples_per_second": 2335.773,
33
+ "eval_steps_per_second": 48.976,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
+ "eval_accuracy": 0.886774193548387,
39
+ "eval_loss": 0.07951628416776657,
40
+ "eval_runtime": 1.3591,
41
+ "eval_samples_per_second": 2280.965,
42
+ "eval_steps_per_second": 47.827,
43
  "step": 954
44
  },
45
  {
46
+ "epoch": 3.1446540880503147,
47
+ "grad_norm": 0.7344270348548889,
48
+ "learning_rate": 1.101527403414196e-05,
49
+ "loss": 0.1749,
50
  "step": 1000
51
  }
52
  ],
53
  "logging_steps": 500,
54
+ "max_steps": 2226,
55
  "num_input_tokens_seen": 0,
56
+ "num_train_epochs": 7,
57
  "save_steps": 500,
58
+ "stateful_callbacks": {
59
+ "TrainerControl": {
60
+ "args": {
61
+ "should_epoch_stop": false,
62
+ "should_evaluate": false,
63
+ "should_log": false,
64
+ "should_save": true,
65
+ "should_training_stop": false
66
+ },
67
+ "attributes": {}
68
+ }
69
+ },
70
+ "total_flos": 260941334653608.0,
71
  "train_batch_size": 48,
72
  "trial_name": null,
73
  "trial_params": {
74
+ "alpha": 0.5999229797501479,
75
+ "num_train_epochs": 7,
76
+ "temperature": 2
77
  }
78
  }
run-0/checkpoint-1500/config.json CHANGED
@@ -326,6 +326,6 @@
326
  "sinusoidal_pos_embds": false,
327
  "tie_weights_": true,
328
  "torch_dtype": "float32",
329
- "transformers_version": "4.37.2",
330
  "vocab_size": 30522
331
  }
 
326
  "sinusoidal_pos_embds": false,
327
  "tie_weights_": true,
328
  "torch_dtype": "float32",
329
+ "transformers_version": "4.41.1",
330
  "vocab_size": 30522
331
  }
run-0/checkpoint-1500/tokenizer.json CHANGED
@@ -1,11 +1,6 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 512,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
- },
9
  "padding": null,
10
  "added_tokens": [
11
  {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
run-0/checkpoint-1500/trainer_state.json CHANGED
@@ -10,70 +10,85 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.5958064516129032,
14
- "eval_loss": 0.1949465274810791,
15
- "eval_runtime": 1.357,
16
- "eval_samples_per_second": 2284.501,
17
- "eval_steps_per_second": 47.901,
18
  "step": 318
19
  },
20
  {
21
- "epoch": 1.57,
22
- "learning_rate": 1.606918238993711e-05,
23
- "loss": 0.3113,
 
24
  "step": 500
25
  },
26
  {
27
  "epoch": 2.0,
28
- "eval_accuracy": 0.8164516129032258,
29
- "eval_loss": 0.09700000286102295,
30
- "eval_runtime": 1.3602,
31
- "eval_samples_per_second": 2279.158,
32
- "eval_steps_per_second": 47.789,
33
  "step": 636
34
  },
35
  {
36
  "epoch": 3.0,
37
- "eval_accuracy": 0.8754838709677419,
38
- "eval_loss": 0.0647290050983429,
39
- "eval_runtime": 1.3748,
40
- "eval_samples_per_second": 2254.826,
41
- "eval_steps_per_second": 47.279,
42
  "step": 954
43
  },
44
  {
45
- "epoch": 3.14,
46
- "learning_rate": 1.2138364779874214e-05,
47
- "loss": 0.1105,
 
48
  "step": 1000
49
  },
50
  {
51
  "epoch": 4.0,
52
- "eval_accuracy": 0.8951612903225806,
53
- "eval_loss": 0.05031874030828476,
54
- "eval_runtime": 1.3686,
55
- "eval_samples_per_second": 2265.046,
56
- "eval_steps_per_second": 47.493,
57
  "step": 1272
58
  },
59
  {
60
- "epoch": 4.72,
61
- "learning_rate": 8.207547169811321e-06,
62
- "loss": 0.0722,
 
63
  "step": 1500
64
  }
65
  ],
66
  "logging_steps": 500,
67
- "max_steps": 2544,
68
  "num_input_tokens_seen": 0,
69
- "num_train_epochs": 8,
70
  "save_steps": 500,
71
- "total_flos": 390310534917408.0,
 
 
 
 
 
 
 
 
 
 
 
 
72
  "train_batch_size": 48,
73
  "trial_name": null,
74
  "trial_params": {
75
- "alpha": 0.05314446157998587,
76
- "num_train_epochs": 8,
77
- "temperature": 17
78
  }
79
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.6674193548387096,
14
+ "eval_loss": 0.416048139333725,
15
+ "eval_runtime": 1.3187,
16
+ "eval_samples_per_second": 2350.748,
17
+ "eval_steps_per_second": 49.29,
18
  "step": 318
19
  },
20
  {
21
+ "epoch": 1.5723270440251573,
22
+ "grad_norm": 0.9905994534492493,
23
+ "learning_rate": 1.550763701707098e-05,
24
+ "loss": 0.6477,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "eval_accuracy": 0.8332258064516129,
30
+ "eval_loss": 0.15088918805122375,
31
+ "eval_runtime": 1.3272,
32
+ "eval_samples_per_second": 2335.773,
33
+ "eval_steps_per_second": 48.976,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
+ "eval_accuracy": 0.886774193548387,
39
+ "eval_loss": 0.07951628416776657,
40
+ "eval_runtime": 1.3591,
41
+ "eval_samples_per_second": 2280.965,
42
+ "eval_steps_per_second": 47.827,
43
  "step": 954
44
  },
45
  {
46
+ "epoch": 3.1446540880503147,
47
+ "grad_norm": 0.7344270348548889,
48
+ "learning_rate": 1.101527403414196e-05,
49
+ "loss": 0.1749,
50
  "step": 1000
51
  },
52
  {
53
  "epoch": 4.0,
54
+ "eval_accuracy": 0.9090322580645162,
55
+ "eval_loss": 0.05769029259681702,
56
+ "eval_runtime": 1.3435,
57
+ "eval_samples_per_second": 2307.35,
58
+ "eval_steps_per_second": 48.38,
59
  "step": 1272
60
  },
61
  {
62
+ "epoch": 4.716981132075472,
63
+ "grad_norm": 0.4701627790927887,
64
+ "learning_rate": 6.522911051212939e-06,
65
+ "loss": 0.0902,
66
  "step": 1500
67
  }
68
  ],
69
  "logging_steps": 500,
70
+ "max_steps": 2226,
71
  "num_input_tokens_seen": 0,
72
+ "num_train_epochs": 7,
73
  "save_steps": 500,
74
+ "stateful_callbacks": {
75
+ "TrainerControl": {
76
+ "args": {
77
+ "should_epoch_stop": false,
78
+ "should_evaluate": false,
79
+ "should_log": false,
80
+ "should_save": true,
81
+ "should_training_stop": false
82
+ },
83
+ "attributes": {}
84
+ }
85
+ },
86
+ "total_flos": 391368939443328.0,
87
  "train_batch_size": 48,
88
  "trial_name": null,
89
  "trial_params": {
90
+ "alpha": 0.5999229797501479,
91
+ "num_train_epochs": 7,
92
+ "temperature": 2
93
  }
94
  }
run-0/checkpoint-500/config.json CHANGED
@@ -326,6 +326,6 @@
326
  "sinusoidal_pos_embds": false,
327
  "tie_weights_": true,
328
  "torch_dtype": "float32",
329
- "transformers_version": "4.37.2",
330
  "vocab_size": 30522
331
  }
 
326
  "sinusoidal_pos_embds": false,
327
  "tie_weights_": true,
328
  "torch_dtype": "float32",
329
+ "transformers_version": "4.41.1",
330
  "vocab_size": 30522
331
  }
run-0/checkpoint-500/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:02892266441aea1d8c6015a1aa8f42f52896dc6a7d4c94ee4456d61840826de9
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b64fce98847974a1fbc15a5211620e911f29d0a6fa5bc6a5bf2e0acabc13c361
3
  size 268290900
run-0/checkpoint-500/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88991dd767245c671b09ac05356c4a34c293e243214a97ee9fcf460f8ad6b548
3
  size 536643898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b58f2178d777caeda6638dad5deb905c5d807ad067e89066f0357dbd988962e
3
  size 536643898
run-0/checkpoint-500/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8174c1d6ac2627de77161655e4e20ef37d9f2235e2e7c4adc0da0d4e0e14b6d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9fb2a0dfb1b0ccef3590fe01e9bd16b6db86f247cc9c7e77290c217a53bac20
3
  size 14244
run-0/checkpoint-500/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90dc4637e972cc69c745eebddd8a7560dca27d2318df3e23f8e145abbf236536
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11e744277c61f87520794334442fae36c5f9ff6e10cb79d4bfee5176ca7eafe2
3
  size 1064
run-0/checkpoint-500/tokenizer.json CHANGED
@@ -1,11 +1,6 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 512,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
- },
9
  "padding": null,
10
  "added_tokens": [
11
  {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
run-0/checkpoint-500/trainer_state.json CHANGED
@@ -10,31 +10,44 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.5958064516129032,
14
- "eval_loss": 0.1949465274810791,
15
- "eval_runtime": 1.357,
16
- "eval_samples_per_second": 2284.501,
17
- "eval_steps_per_second": 47.901,
18
  "step": 318
19
  },
20
  {
21
- "epoch": 1.57,
22
- "learning_rate": 1.606918238993711e-05,
23
- "loss": 0.3113,
 
24
  "step": 500
25
  }
26
  ],
27
  "logging_steps": 500,
28
- "max_steps": 2544,
29
  "num_input_tokens_seen": 0,
30
- "num_train_epochs": 8,
31
  "save_steps": 500,
32
- "total_flos": 129219778448376.0,
 
 
 
 
 
 
 
 
 
 
 
 
33
  "train_batch_size": 48,
34
  "trial_name": null,
35
  "trial_params": {
36
- "alpha": 0.05314446157998587,
37
- "num_train_epochs": 8,
38
- "temperature": 17
39
  }
40
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.6674193548387096,
14
+ "eval_loss": 0.416048139333725,
15
+ "eval_runtime": 1.3187,
16
+ "eval_samples_per_second": 2350.748,
17
+ "eval_steps_per_second": 49.29,
18
  "step": 318
19
  },
20
  {
21
+ "epoch": 1.5723270440251573,
22
+ "grad_norm": 0.9905994534492493,
23
+ "learning_rate": 1.550763701707098e-05,
24
+ "loss": 0.6477,
25
  "step": 500
26
  }
27
  ],
28
  "logging_steps": 500,
29
+ "max_steps": 2226,
30
  "num_input_tokens_seen": 0,
31
+ "num_train_epochs": 7,
32
  "save_steps": 500,
33
+ "stateful_callbacks": {
34
+ "TrainerControl": {
35
+ "args": {
36
+ "should_epoch_stop": false,
37
+ "should_evaluate": false,
38
+ "should_log": false,
39
+ "should_save": true,
40
+ "should_training_stop": false
41
+ },
42
+ "attributes": {}
43
+ }
44
+ },
45
+ "total_flos": 130072209152340.0,
46
  "train_batch_size": 48,
47
  "trial_name": null,
48
  "trial_params": {
49
+ "alpha": 0.5999229797501479,
50
+ "num_train_epochs": 7,
51
+ "temperature": 2
52
  }
53
  }
run-0/checkpoint-500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:128e3e531e1f78b4508e424d6b0320b2ccd0b6f2b6fb1f09e3f886f07d5e86ea
3
- size 4728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3995b387f2d0291be709cdbf61f96ec0665c9f1f556146e986cb9b22d69b84bd
3
+ size 5176
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66f849306af77ca3cfc7f8025881a2385ebcd0bc81cdd8e3ab072446806891ae
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3995b387f2d0291be709cdbf61f96ec0665c9f1f556146e986cb9b22d69b84bd
3
  size 5176