saqidr commited on
Commit
486da0c
·
verified ·
1 Parent(s): 952f487

Training in progress, step 500

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:156ea8881e640a983d137bf9a2f1b9c9831f3dfa43621563d93e76d3f69fdeaf
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfd3bf3d49936dfd8baa9582c8eaba7717d92d7d1f8d94038d4151e4c2d4f278
3
  size 268290900
run-0/checkpoint-2500/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c5480e86a14c3a5a2ed23ef1a3c50cc4071345c9709d7ff4fa42a75714fb2d4
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b19c0ce2f5e0f7b94bb48ab83ea28dc1398fadf382621a268cfed8eaba49b3c
3
  size 268290900
run-0/checkpoint-2500/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6267e7147c56b42a2babf11bb31d0773d90f6a6cf848cd12b413e7b310c7e524
3
  size 536643898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4b906831cff1b0088ec50f8b463e517cb334962b03119b28ae671410ba27e2b
3
  size 536643898
run-0/checkpoint-2500/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d116f7be47ce88249a782ce7aafaf5b673a05bfa0ebdf9f14ab4c327c94c6b04
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b997ea9328158f28ff50e49c476931e703b821360bc8325e0c4d100e032c865
3
  size 1064
run-0/checkpoint-2500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1a32d8cd0db56d157e9d132d07845e4b644e6f4a2e7846f84de34c5644e4295
3
  size 4728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:128e3e531e1f78b4508e424d6b0320b2ccd0b6f2b6fb1f09e3f886f07d5e86ea
3
  size 4728
run-1/checkpoint-1000/trainer_state.json CHANGED
@@ -10,55 +10,55 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.6412903225806451,
14
- "eval_loss": 0.230123370885849,
15
- "eval_runtime": 1.3595,
16
- "eval_samples_per_second": 2280.291,
17
- "eval_steps_per_second": 47.813,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.57,
22
- "learning_rate": 1.685534591194969e-05,
23
- "loss": 0.3693,
24
  "step": 500
25
  },
26
  {
27
  "epoch": 2.0,
28
- "eval_accuracy": 0.8358064516129032,
29
- "eval_loss": 0.10448037087917328,
30
- "eval_runtime": 1.3653,
31
- "eval_samples_per_second": 2270.569,
32
- "eval_steps_per_second": 47.609,
33
  "step": 636
34
  },
35
  {
36
  "epoch": 3.0,
37
- "eval_accuracy": 0.8851612903225806,
38
- "eval_loss": 0.06413500756025314,
39
- "eval_runtime": 1.3667,
40
- "eval_samples_per_second": 2268.292,
41
- "eval_steps_per_second": 47.561,
42
  "step": 954
43
  },
44
  {
45
  "epoch": 3.14,
46
- "learning_rate": 1.371069182389937e-05,
47
- "loss": 0.1204,
48
  "step": 1000
49
  }
50
  ],
51
  "logging_steps": 500,
52
- "max_steps": 3180,
53
  "num_input_tokens_seen": 0,
54
- "num_train_epochs": 10,
55
  "save_steps": 500,
56
  "total_flos": 259991364709020.0,
57
  "train_batch_size": 48,
58
  "trial_name": null,
59
  "trial_params": {
60
- "alpha": 0.7207200745295966,
61
- "num_train_epochs": 10,
62
- "temperature": 5
63
  }
64
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.5896774193548387,
14
+ "eval_loss": 0.21174675226211548,
15
+ "eval_runtime": 1.3562,
16
+ "eval_samples_per_second": 2285.793,
17
+ "eval_steps_per_second": 47.928,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.57,
22
+ "learning_rate": 1.371069182389937e-05,
23
+ "loss": 0.3315,
24
  "step": 500
25
  },
26
  {
27
  "epoch": 2.0,
28
+ "eval_accuracy": 0.8035483870967742,
29
+ "eval_loss": 0.10993191599845886,
30
+ "eval_runtime": 1.3614,
31
+ "eval_samples_per_second": 2277.03,
32
+ "eval_steps_per_second": 47.744,
33
  "step": 636
34
  },
35
  {
36
  "epoch": 3.0,
37
+ "eval_accuracy": 0.8616129032258064,
38
+ "eval_loss": 0.07778050750494003,
39
+ "eval_runtime": 1.3633,
40
+ "eval_samples_per_second": 2273.815,
41
+ "eval_steps_per_second": 47.677,
42
  "step": 954
43
  },
44
  {
45
  "epoch": 3.14,
46
+ "learning_rate": 7.421383647798742e-06,
47
+ "loss": 0.1256,
48
  "step": 1000
49
  }
50
  ],
51
  "logging_steps": 500,
52
+ "max_steps": 1590,
53
  "num_input_tokens_seen": 0,
54
+ "num_train_epochs": 5,
55
  "save_steps": 500,
56
  "total_flos": 259991364709020.0,
57
  "train_batch_size": 48,
58
  "trial_name": null,
59
  "trial_params": {
60
+ "alpha": 0.8444910353259852,
61
+ "num_train_epochs": 5,
62
+ "temperature": 10
63
  }
64
  }
run-1/checkpoint-1500/trainer_state.json CHANGED
@@ -10,70 +10,70 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.6412903225806451,
14
- "eval_loss": 0.230123370885849,
15
- "eval_runtime": 1.3595,
16
- "eval_samples_per_second": 2280.291,
17
- "eval_steps_per_second": 47.813,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.57,
22
- "learning_rate": 1.685534591194969e-05,
23
- "loss": 0.3693,
24
  "step": 500
25
  },
26
  {
27
  "epoch": 2.0,
28
- "eval_accuracy": 0.8358064516129032,
29
- "eval_loss": 0.10448037087917328,
30
- "eval_runtime": 1.3653,
31
- "eval_samples_per_second": 2270.569,
32
- "eval_steps_per_second": 47.609,
33
  "step": 636
34
  },
35
  {
36
  "epoch": 3.0,
37
- "eval_accuracy": 0.8851612903225806,
38
- "eval_loss": 0.06413500756025314,
39
- "eval_runtime": 1.3667,
40
- "eval_samples_per_second": 2268.292,
41
- "eval_steps_per_second": 47.561,
42
  "step": 954
43
  },
44
  {
45
  "epoch": 3.14,
46
- "learning_rate": 1.371069182389937e-05,
47
- "loss": 0.1204,
48
  "step": 1000
49
  },
50
  {
51
  "epoch": 4.0,
52
- "eval_accuracy": 0.9041935483870968,
53
- "eval_loss": 0.04693836718797684,
54
- "eval_runtime": 1.3284,
55
- "eval_samples_per_second": 2333.641,
56
- "eval_steps_per_second": 48.931,
57
  "step": 1272
58
  },
59
  {
60
  "epoch": 4.72,
61
- "learning_rate": 1.0566037735849058e-05,
62
- "loss": 0.0726,
63
  "step": 1500
64
  }
65
  ],
66
  "logging_steps": 500,
67
- "max_steps": 3180,
68
  "num_input_tokens_seen": 0,
69
- "num_train_epochs": 10,
70
  "save_steps": 500,
71
  "total_flos": 390310534917408.0,
72
  "train_batch_size": 48,
73
  "trial_name": null,
74
  "trial_params": {
75
- "alpha": 0.7207200745295966,
76
- "num_train_epochs": 10,
77
- "temperature": 5
78
  }
79
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.5896774193548387,
14
+ "eval_loss": 0.21174675226211548,
15
+ "eval_runtime": 1.3562,
16
+ "eval_samples_per_second": 2285.793,
17
+ "eval_steps_per_second": 47.928,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.57,
22
+ "learning_rate": 1.371069182389937e-05,
23
+ "loss": 0.3315,
24
  "step": 500
25
  },
26
  {
27
  "epoch": 2.0,
28
+ "eval_accuracy": 0.8035483870967742,
29
+ "eval_loss": 0.10993191599845886,
30
+ "eval_runtime": 1.3614,
31
+ "eval_samples_per_second": 2277.03,
32
+ "eval_steps_per_second": 47.744,
33
  "step": 636
34
  },
35
  {
36
  "epoch": 3.0,
37
+ "eval_accuracy": 0.8616129032258064,
38
+ "eval_loss": 0.07778050750494003,
39
+ "eval_runtime": 1.3633,
40
+ "eval_samples_per_second": 2273.815,
41
+ "eval_steps_per_second": 47.677,
42
  "step": 954
43
  },
44
  {
45
  "epoch": 3.14,
46
+ "learning_rate": 7.421383647798742e-06,
47
+ "loss": 0.1256,
48
  "step": 1000
49
  },
50
  {
51
  "epoch": 4.0,
52
+ "eval_accuracy": 0.8783870967741936,
53
+ "eval_loss": 0.06527664512395859,
54
+ "eval_runtime": 1.3774,
55
+ "eval_samples_per_second": 2250.621,
56
+ "eval_steps_per_second": 47.19,
57
  "step": 1272
58
  },
59
  {
60
  "epoch": 4.72,
61
+ "learning_rate": 1.1320754716981133e-06,
62
+ "loss": 0.0903,
63
  "step": 1500
64
  }
65
  ],
66
  "logging_steps": 500,
67
+ "max_steps": 1590,
68
  "num_input_tokens_seen": 0,
69
+ "num_train_epochs": 5,
70
  "save_steps": 500,
71
  "total_flos": 390310534917408.0,
72
  "train_batch_size": 48,
73
  "trial_name": null,
74
  "trial_params": {
75
+ "alpha": 0.8444910353259852,
76
+ "num_train_epochs": 5,
77
+ "temperature": 10
78
  }
79
  }
run-1/checkpoint-500/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ecc1f11e1ef54a1b58f7693d099fea3bedfbfea3641ac16e05bffb49d95fa7f2
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfd3bf3d49936dfd8baa9582c8eaba7717d92d7d1f8d94038d4151e4c2d4f278
3
  size 268290900
run-1/checkpoint-500/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d266ed532c816f7e3c324cd8af36c1d30b9a7dc7a458634269df939061528b01
3
  size 536643898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4a7b2cb6089e0574b3e01b3ab3738f4f73b1a92359ec8127a9ce373bb97a274
3
  size 536643898
run-1/checkpoint-500/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04366f62f8f88f5a8265df59adb051b320463277845db80e7fa43f13110c18c9
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e1264523e958cf7990dc5f42d876cc12129475c4603804cf66868aaf25c2c24
3
  size 1064
run-1/checkpoint-500/trainer_state.json CHANGED
@@ -10,31 +10,31 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.6412903225806451,
14
- "eval_loss": 0.230123370885849,
15
- "eval_runtime": 1.3595,
16
- "eval_samples_per_second": 2280.291,
17
- "eval_steps_per_second": 47.813,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.57,
22
- "learning_rate": 1.685534591194969e-05,
23
- "loss": 0.3693,
24
  "step": 500
25
  }
26
  ],
27
  "logging_steps": 500,
28
- "max_steps": 3180,
29
  "num_input_tokens_seen": 0,
30
- "num_train_epochs": 10,
31
  "save_steps": 500,
32
  "total_flos": 129219778448376.0,
33
  "train_batch_size": 48,
34
  "trial_name": null,
35
  "trial_params": {
36
- "alpha": 0.7207200745295966,
37
- "num_train_epochs": 10,
38
- "temperature": 5
39
  }
40
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.5896774193548387,
14
+ "eval_loss": 0.21174675226211548,
15
+ "eval_runtime": 1.3562,
16
+ "eval_samples_per_second": 2285.793,
17
+ "eval_steps_per_second": 47.928,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.57,
22
+ "learning_rate": 1.371069182389937e-05,
23
+ "loss": 0.3315,
24
  "step": 500
25
  }
26
  ],
27
  "logging_steps": 500,
28
+ "max_steps": 1590,
29
  "num_input_tokens_seen": 0,
30
+ "num_train_epochs": 5,
31
  "save_steps": 500,
32
  "total_flos": 129219778448376.0,
33
  "train_batch_size": 48,
34
  "trial_name": null,
35
  "trial_params": {
36
+ "alpha": 0.8444910353259852,
37
+ "num_train_epochs": 5,
38
+ "temperature": 10
39
  }
40
  }
run-1/checkpoint-500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d9edb169c95045dfb3d8473928c594ea0196c02cbd8544dc5bc903ed7216c71b
3
  size 4728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:440b33c1d4af396a1f83c40460d843946c313509afa8ac95fbf678728a7e05c7
3
  size 4728
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:128e3e531e1f78b4508e424d6b0320b2ccd0b6f2b6fb1f09e3f886f07d5e86ea
3
  size 4728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:440b33c1d4af396a1f83c40460d843946c313509afa8ac95fbf678728a7e05c7
3
  size 4728