saqidr commited on
Commit
b9a3361
·
verified ·
1 Parent(s): 8238dd3

Training in progress, step 500

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94dd719ae3e79ab6db163a2c0423be61fef278b4f047585040f2170742065552
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b64fce98847974a1fbc15a5211620e911f29d0a6fa5bc6a5bf2e0acabc13c361
3
  size 268290900
run-16/checkpoint-500/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b8ad3c9e3788334f5c2adb4229ae87dd8d5043fdb7947dc8f97f35fed88c2e7
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25f9d6eb5310fb82fd6d0ce02178e219ef280bfac553ca1e5a08cd0dd16f2028
3
  size 268290900
run-16/checkpoint-500/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7e6ebd7722bad34820adb2b29c8deca0afdcb6d5c67073d609c2c6adcb9ec77
3
  size 536643898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61b66909a996ee50dd5b57e0c400b5d2288582050b5f926b1c289f7a221255f6
3
  size 536643898
run-16/checkpoint-500/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04366f62f8f88f5a8265df59adb051b320463277845db80e7fa43f13110c18c9
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90dc4637e972cc69c745eebddd8a7560dca27d2318df3e23f8e145abbf236536
3
  size 1064
run-16/checkpoint-500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dcf27e76d8884c946fcfa9b2e89273479b0e58bfc88fe2cda59dc077e543cf3a
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:699ed76d9af91edc99d12562d64ce4055f71cfe483dfd9ab44c7bfc8626ae66f
3
  size 5176
run-18/checkpoint-500/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1df6a2e095aa6bbb5d360e7f012cbe4a7929871fd4d4183142dd1f5f93bff5b9
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b64fce98847974a1fbc15a5211620e911f29d0a6fa5bc6a5bf2e0acabc13c361
3
  size 268290900
run-18/checkpoint-500/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f255b5aed36ba4abd626288657ee63762aafb521c202746c931ad4d10b96a61e
3
  size 536643898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b58f2178d777caeda6638dad5deb905c5d807ad067e89066f0357dbd988962e
3
  size 536643898
run-18/checkpoint-500/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:98041bd7cae455426e290a1a0ee683bd5dd30893f7451fec3a464ae8995b17e4
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11e744277c61f87520794334442fae36c5f9ff6e10cb79d4bfee5176ca7eafe2
3
  size 1064
run-18/checkpoint-500/trainer_state.json CHANGED
@@ -10,25 +10,25 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.5951612903225807,
14
- "eval_loss": 0.20783506333827972,
15
- "eval_runtime": 1.3749,
16
- "eval_samples_per_second": 2254.744,
17
- "eval_steps_per_second": 47.277,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
- "grad_norm": 0.5312509536743164,
23
- "learning_rate": 1.650593990216632e-05,
24
- "loss": 0.3341,
25
  "step": 500
26
  }
27
  ],
28
  "logging_steps": 500,
29
- "max_steps": 2862,
30
  "num_input_tokens_seen": 0,
31
- "num_train_epochs": 9,
32
  "save_steps": 500,
33
  "stateful_callbacks": {
34
  "TrainerControl": {
@@ -42,12 +42,12 @@
42
  "attributes": {}
43
  }
44
  },
45
- "total_flos": 189074629692144.0,
46
  "train_batch_size": 48,
47
  "trial_name": null,
48
  "trial_params": {
49
- "alpha": 0.40001389391237485,
50
- "num_train_epochs": 9,
51
- "temperature": 8
52
  }
53
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.6674193548387096,
14
+ "eval_loss": 0.416048139333725,
15
+ "eval_runtime": 1.3562,
16
+ "eval_samples_per_second": 2285.869,
17
+ "eval_steps_per_second": 47.93,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
+ "grad_norm": 0.9905994534492493,
23
+ "learning_rate": 1.550763701707098e-05,
24
+ "loss": 0.6477,
25
  "step": 500
26
  }
27
  ],
28
  "logging_steps": 500,
29
+ "max_steps": 2226,
30
  "num_input_tokens_seen": 0,
31
+ "num_train_epochs": 7,
32
  "save_steps": 500,
33
  "stateful_callbacks": {
34
  "TrainerControl": {
 
42
  "attributes": {}
43
  }
44
  },
45
+ "total_flos": 248936225670636.0,
46
  "train_batch_size": 48,
47
  "trial_name": null,
48
  "trial_params": {
49
+ "alpha": 0.6352487868101878,
50
+ "num_train_epochs": 7,
51
+ "temperature": 2
52
  }
53
  }
run-18/checkpoint-500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d563c3fa77ebad6147745c6eb10d98ee4dd83dbdc6caa67346241d873d2dc259
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1255ca729d01717aa414fa6756e0d6f9030540b62191575288bf9e49b890e64b
3
  size 5176
run-19/checkpoint-1000/trainer_state.json CHANGED
@@ -10,50 +10,50 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.6229032258064516,
14
- "eval_loss": 0.2478133589029312,
15
- "eval_runtime": 1.3731,
16
- "eval_samples_per_second": 2257.605,
17
- "eval_steps_per_second": 47.337,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
- "grad_norm": 0.6151512861251831,
23
- "learning_rate": 1.685534591194969e-05,
24
- "loss": 0.398,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
- "eval_accuracy": 0.844516129032258,
30
- "eval_loss": 0.10843393951654434,
31
- "eval_runtime": 1.3751,
32
- "eval_samples_per_second": 2254.368,
33
- "eval_steps_per_second": 47.269,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
- "eval_accuracy": 0.89,
39
- "eval_loss": 0.06599755585193634,
40
- "eval_runtime": 1.3802,
41
- "eval_samples_per_second": 2246.112,
42
- "eval_steps_per_second": 47.096,
43
  "step": 954
44
  },
45
  {
46
  "epoch": 3.1446540880503147,
47
- "grad_norm": 0.5515937209129333,
48
- "learning_rate": 1.371069182389937e-05,
49
- "loss": 0.1268,
50
  "step": 1000
51
  }
52
  ],
53
  "logging_steps": 500,
54
- "max_steps": 3180,
55
  "num_input_tokens_seen": 0,
56
- "num_train_epochs": 10,
57
  "save_steps": 500,
58
  "stateful_callbacks": {
59
  "TrainerControl": {
@@ -67,12 +67,12 @@
67
  "attributes": {}
68
  }
69
  },
70
- "total_flos": 319943755193412.0,
71
  "train_batch_size": 48,
72
  "trial_name": null,
73
  "trial_params": {
74
- "alpha": 0.878459838807545,
75
- "num_train_epochs": 10,
76
  "temperature": 4
77
  }
78
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.62,
14
+ "eval_loss": 0.249518021941185,
15
+ "eval_runtime": 1.3797,
16
+ "eval_samples_per_second": 2246.797,
17
+ "eval_steps_per_second": 47.11,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
+ "grad_norm": 0.6149903535842896,
23
+ "learning_rate": 1.650593990216632e-05,
24
+ "loss": 0.3991,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "eval_accuracy": 0.8422580645161291,
30
+ "eval_loss": 0.11037396639585495,
31
+ "eval_runtime": 1.3583,
32
+ "eval_samples_per_second": 2282.186,
33
+ "eval_steps_per_second": 47.852,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
+ "eval_accuracy": 0.8883870967741936,
39
+ "eval_loss": 0.0677555724978447,
40
+ "eval_runtime": 1.3701,
41
+ "eval_samples_per_second": 2262.648,
42
+ "eval_steps_per_second": 47.443,
43
  "step": 954
44
  },
45
  {
46
  "epoch": 3.1446540880503147,
47
+ "grad_norm": 0.5547620058059692,
48
+ "learning_rate": 1.3011879804332637e-05,
49
+ "loss": 0.1288,
50
  "step": 1000
51
  }
52
  ],
53
  "logging_steps": 500,
54
+ "max_steps": 2862,
55
  "num_input_tokens_seen": 0,
56
+ "num_train_epochs": 9,
57
  "save_steps": 500,
58
  "stateful_callbacks": {
59
  "TrainerControl": {
 
67
  "attributes": {}
68
  }
69
  },
70
+ "total_flos": 296801013878124.0,
71
  "train_batch_size": 48,
72
  "trial_name": null,
73
  "trial_params": {
74
+ "alpha": 0.8975938459167363,
75
+ "num_train_epochs": 9,
76
  "temperature": 4
77
  }
78
  }
run-19/checkpoint-500/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c39c8c8c93ca935fff2403d4310e8f1a88a48f69672fc74726e178cf1685176
3
  size 536643898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f632c364b0e8bb9c5aaa1a1d92dc11251e021768e8319ab85e38045e3c31cf61
3
  size 536643898
run-19/checkpoint-500/trainer_state.json CHANGED
@@ -10,25 +10,25 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.6229032258064516,
14
- "eval_loss": 0.2478133589029312,
15
- "eval_runtime": 1.3731,
16
- "eval_samples_per_second": 2257.605,
17
- "eval_steps_per_second": 47.337,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
- "grad_norm": 0.6151512861251831,
23
- "learning_rate": 1.685534591194969e-05,
24
- "loss": 0.398,
25
  "step": 500
26
  }
27
  ],
28
  "logging_steps": 500,
29
- "max_steps": 3180,
30
  "num_input_tokens_seen": 0,
31
- "num_train_epochs": 10,
32
  "save_steps": 500,
33
  "stateful_callbacks": {
34
  "TrainerControl": {
@@ -42,12 +42,12 @@
42
  "attributes": {}
43
  }
44
  },
45
- "total_flos": 189074629692144.0,
46
  "train_batch_size": 48,
47
  "trial_name": null,
48
  "trial_params": {
49
- "alpha": 0.878459838807545,
50
- "num_train_epochs": 10,
51
  "temperature": 4
52
  }
53
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.62,
14
+ "eval_loss": 0.249518021941185,
15
+ "eval_runtime": 1.3797,
16
+ "eval_samples_per_second": 2246.797,
17
+ "eval_steps_per_second": 47.11,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
+ "grad_norm": 0.6149903535842896,
23
+ "learning_rate": 1.650593990216632e-05,
24
+ "loss": 0.3991,
25
  "step": 500
26
  }
27
  ],
28
  "logging_steps": 500,
29
+ "max_steps": 2862,
30
  "num_input_tokens_seen": 0,
31
+ "num_train_epochs": 9,
32
  "save_steps": 500,
33
  "stateful_callbacks": {
34
  "TrainerControl": {
 
42
  "attributes": {}
43
  }
44
  },
45
+ "total_flos": 165931888376856.0,
46
  "train_batch_size": 48,
47
  "trial_name": null,
48
  "trial_params": {
49
+ "alpha": 0.8975938459167363,
50
+ "num_train_epochs": 9,
51
  "temperature": 4
52
  }
53
  }
run-19/checkpoint-500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43ad3e5230508df5eafe3b8cc807f8ce5e9543207331b4b6a4fd8cdd4dbc0b67
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b2e73103b9db907e09d098899dc83e3dbf1869ac8fe958d6aec7beb5cf0db0d
3
  size 5176
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b2e73103b9db907e09d098899dc83e3dbf1869ac8fe958d6aec7beb5cf0db0d
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1255ca729d01717aa414fa6756e0d6f9030540b62191575288bf9e49b890e64b
3
  size 5176