saqidr commited on
Commit
8238dd3
·
verified ·
1 Parent(s): b5ec692

Training in progress, step 2000

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c523e1752fda46381be0344a31afc01362450b48bbf8705a2c2bdc36cbf5d0d2
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94dd719ae3e79ab6db163a2c0423be61fef278b4f047585040f2170742065552
3
  size 268290900
run-15/checkpoint-1000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:920ec89dc47e401d4efb361d0fd82b37959f7734e045af2043ed258600a2854b
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b2e73103b9db907e09d098899dc83e3dbf1869ac8fe958d6aec7beb5cf0db0d
3
  size 5176
run-15/checkpoint-1500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:920ec89dc47e401d4efb361d0fd82b37959f7734e045af2043ed258600a2854b
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b2e73103b9db907e09d098899dc83e3dbf1869ac8fe958d6aec7beb5cf0db0d
3
  size 5176
run-15/checkpoint-2000/trainer_state.json CHANGED
@@ -12,9 +12,9 @@
12
  "epoch": 1.0,
13
  "eval_accuracy": 0.62,
14
  "eval_loss": 0.249518021941185,
15
- "eval_runtime": 1.3697,
16
- "eval_samples_per_second": 2263.294,
17
- "eval_steps_per_second": 47.456,
18
  "step": 318
19
  },
20
  {
@@ -28,18 +28,18 @@
28
  "epoch": 2.0,
29
  "eval_accuracy": 0.8422580645161291,
30
  "eval_loss": 0.11037396639585495,
31
- "eval_runtime": 1.3637,
32
- "eval_samples_per_second": 2273.273,
33
- "eval_steps_per_second": 47.665,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
  "eval_accuracy": 0.8883870967741936,
39
  "eval_loss": 0.0677555724978447,
40
- "eval_runtime": 1.3669,
41
- "eval_samples_per_second": 2267.902,
42
- "eval_steps_per_second": 47.553,
43
  "step": 954
44
  },
45
  {
@@ -53,9 +53,9 @@
53
  "epoch": 4.0,
54
  "eval_accuracy": 0.902258064516129,
55
  "eval_loss": 0.04977014288306236,
56
- "eval_runtime": 1.3624,
57
- "eval_samples_per_second": 2275.356,
58
- "eval_steps_per_second": 47.709,
59
  "step": 1272
60
  },
61
  {
@@ -69,18 +69,18 @@
69
  "epoch": 5.0,
70
  "eval_accuracy": 0.9167741935483871,
71
  "eval_loss": 0.039991483092308044,
72
- "eval_runtime": 1.3719,
73
- "eval_samples_per_second": 2259.57,
74
- "eval_steps_per_second": 47.378,
75
  "step": 1590
76
  },
77
  {
78
  "epoch": 6.0,
79
  "eval_accuracy": 0.9232258064516129,
80
  "eval_loss": 0.034951966255903244,
81
- "eval_runtime": 1.3675,
82
- "eval_samples_per_second": 2266.904,
83
- "eval_steps_per_second": 47.532,
84
  "step": 1908
85
  },
86
  {
@@ -108,11 +108,11 @@
108
  "attributes": {}
109
  }
110
  },
111
- "total_flos": 579993747211956.0,
112
  "train_batch_size": 48,
113
  "trial_name": null,
114
  "trial_params": {
115
- "alpha": 0.9970515290608379,
116
  "num_train_epochs": 9,
117
  "temperature": 4
118
  }
 
12
  "epoch": 1.0,
13
  "eval_accuracy": 0.62,
14
  "eval_loss": 0.249518021941185,
15
+ "eval_runtime": 1.2519,
16
+ "eval_samples_per_second": 2476.275,
17
+ "eval_steps_per_second": 51.922,
18
  "step": 318
19
  },
20
  {
 
28
  "epoch": 2.0,
29
  "eval_accuracy": 0.8422580645161291,
30
  "eval_loss": 0.11037396639585495,
31
+ "eval_runtime": 1.2525,
32
+ "eval_samples_per_second": 2475.052,
33
+ "eval_steps_per_second": 51.896,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
  "eval_accuracy": 0.8883870967741936,
39
  "eval_loss": 0.0677555724978447,
40
+ "eval_runtime": 1.2545,
41
+ "eval_samples_per_second": 2471.149,
42
+ "eval_steps_per_second": 51.814,
43
  "step": 954
44
  },
45
  {
 
53
  "epoch": 4.0,
54
  "eval_accuracy": 0.902258064516129,
55
  "eval_loss": 0.04977014288306236,
56
+ "eval_runtime": 1.2559,
57
+ "eval_samples_per_second": 2468.428,
58
+ "eval_steps_per_second": 51.757,
59
  "step": 1272
60
  },
61
  {
 
69
  "epoch": 5.0,
70
  "eval_accuracy": 0.9167741935483871,
71
  "eval_loss": 0.039991483092308044,
72
+ "eval_runtime": 1.2562,
73
+ "eval_samples_per_second": 2467.767,
74
+ "eval_steps_per_second": 51.743,
75
  "step": 1590
76
  },
77
  {
78
  "epoch": 6.0,
79
  "eval_accuracy": 0.9232258064516129,
80
  "eval_loss": 0.034951966255903244,
81
+ "eval_runtime": 1.2571,
82
+ "eval_samples_per_second": 2466.006,
83
+ "eval_steps_per_second": 51.707,
84
  "step": 1908
85
  },
86
  {
 
108
  "attributes": {}
109
  }
110
  },
111
+ "total_flos": 603995663965932.0,
112
  "train_batch_size": 48,
113
  "trial_name": null,
114
  "trial_params": {
115
+ "alpha": 0.9969607810126598,
116
  "num_train_epochs": 9,
117
  "temperature": 4
118
  }
run-15/checkpoint-2000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:920ec89dc47e401d4efb361d0fd82b37959f7734e045af2043ed258600a2854b
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b2e73103b9db907e09d098899dc83e3dbf1869ac8fe958d6aec7beb5cf0db0d
3
  size 5176
run-16/checkpoint-500/trainer_state.json CHANGED
@@ -10,25 +10,25 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.594516129032258,
14
- "eval_loss": 0.20309074223041534,
15
- "eval_runtime": 1.4199,
16
- "eval_samples_per_second": 2183.305,
17
- "eval_steps_per_second": 45.779,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
- "grad_norm": 0.5301857590675354,
23
- "learning_rate": 1.685534591194969e-05,
24
- "loss": 0.3277,
25
  "step": 500
26
  }
27
  ],
28
  "logging_steps": 500,
29
- "max_steps": 3180,
30
  "num_input_tokens_seen": 0,
31
- "num_train_epochs": 10,
32
  "save_steps": 500,
33
  "stateful_callbacks": {
34
  "TrainerControl": {
@@ -42,12 +42,12 @@
42
  "attributes": {}
43
  }
44
  },
45
- "total_flos": 141540333486132.0,
46
  "train_batch_size": 48,
47
  "trial_name": null,
48
  "trial_params": {
49
- "alpha": 0.35485465599924254,
50
- "num_train_epochs": 10,
51
- "temperature": 9
52
  }
53
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.6190322580645161,
14
+ "eval_loss": 0.25124841928482056,
15
+ "eval_runtime": 1.2599,
16
+ "eval_samples_per_second": 2460.478,
17
+ "eval_steps_per_second": 51.591,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
+ "grad_norm": 0.6181610226631165,
23
+ "learning_rate": 1.606918238993711e-05,
24
+ "loss": 0.4004,
25
  "step": 500
26
  }
27
  ],
28
  "logging_steps": 500,
29
+ "max_steps": 2544,
30
  "num_input_tokens_seen": 0,
31
+ "num_train_epochs": 8,
32
  "save_steps": 500,
33
  "stateful_callbacks": {
34
  "TrainerControl": {
 
42
  "attributes": {}
43
  }
44
  },
45
+ "total_flos": 189074629692144.0,
46
  "train_batch_size": 48,
47
  "trial_name": null,
48
  "trial_params": {
49
+ "alpha": 0.06427792762358042,
50
+ "num_train_epochs": 8,
51
+ "temperature": 4
52
  }
53
  }