Chhabi commited on
Commit
a25707d
·
verified ·
1 Parent(s): b7bba14

Training in progress, step 2500

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44a8934746ca4730991ad664b1e5b257eeedbf4cdb1b0b7236a263059dae6b2e
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee889f64a3803bc7e93cca2dfec2e9dd8409e8881683d53e650bd0b988a67ebd
3
  size 268290900
run-2/checkpoint-2000/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f62bf159f5dba41ee9512cd0ba4df844f0327f305bb54850661dde29bab84062
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:828c5f883c6abb24df76c48ec692de7a5b53e001b7bf1e61b60b5c02b641ebfd
3
  size 268290900
run-2/checkpoint-2000/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26853c8849f771c5e4e2cfc5f5395a93d0db9efbd1542821073bc16e3bcc608a
3
  size 536643898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bd182f20f6b3ec49934f2c7059178d19ec372d746d42a1b0ce0be887239d97a
3
  size 536643898
run-2/checkpoint-2000/trainer_state.json CHANGED
@@ -10,84 +10,84 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.6309677419354839,
14
- "eval_loss": 0.2469712644815445,
15
- "eval_runtime": 5.4589,
16
- "eval_samples_per_second": 567.879,
17
- "eval_steps_per_second": 11.907,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
- "grad_norm": 0.5992535352706909,
23
  "learning_rate": 1.685534591194969e-05,
24
- "loss": 0.3971,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
- "eval_accuracy": 0.8422580645161291,
30
- "eval_loss": 0.10715335607528687,
31
- "eval_runtime": 5.8957,
32
- "eval_samples_per_second": 525.809,
33
- "eval_steps_per_second": 11.025,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
- "eval_accuracy": 0.8893548387096775,
39
- "eval_loss": 0.06534729152917862,
40
- "eval_runtime": 5.5645,
41
- "eval_samples_per_second": 557.106,
42
- "eval_steps_per_second": 11.681,
43
  "step": 954
44
  },
45
  {
46
  "epoch": 3.1446540880503147,
47
- "grad_norm": 0.5163053870201111,
48
  "learning_rate": 1.371069182389937e-05,
49
- "loss": 0.1257,
50
  "step": 1000
51
  },
52
  {
53
  "epoch": 4.0,
54
- "eval_accuracy": 0.9077419354838709,
55
- "eval_loss": 0.04786108061671257,
56
- "eval_runtime": 5.7341,
57
- "eval_samples_per_second": 540.622,
58
- "eval_steps_per_second": 11.336,
59
  "step": 1272
60
  },
61
  {
62
  "epoch": 4.716981132075472,
63
- "grad_norm": 0.3360358476638794,
64
  "learning_rate": 1.0566037735849058e-05,
65
- "loss": 0.0738,
66
  "step": 1500
67
  },
68
  {
69
  "epoch": 5.0,
70
- "eval_accuracy": 0.9161290322580645,
71
- "eval_loss": 0.038218673318624496,
72
- "eval_runtime": 5.7603,
73
- "eval_samples_per_second": 538.164,
74
- "eval_steps_per_second": 11.284,
75
  "step": 1590
76
  },
77
  {
78
  "epoch": 6.0,
79
- "eval_accuracy": 0.9258064516129032,
80
- "eval_loss": 0.03327132761478424,
81
- "eval_runtime": 5.565,
82
- "eval_samples_per_second": 557.05,
83
- "eval_steps_per_second": 11.68,
84
  "step": 1908
85
  },
86
  {
87
  "epoch": 6.289308176100629,
88
- "grad_norm": 0.2815706431865692,
89
  "learning_rate": 7.421383647798742e-06,
90
- "loss": 0.056,
91
  "step": 2000
92
  }
93
  ],
@@ -112,8 +112,8 @@
112
  "train_batch_size": 48,
113
  "trial_name": null,
114
  "trial_params": {
115
- "alpha": 0.1299955797744864,
116
  "num_train_epochs": 10,
117
- "temperature": 4
118
  }
119
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.594516129032258,
14
+ "eval_loss": 0.20276567339897156,
15
+ "eval_runtime": 5.4162,
16
+ "eval_samples_per_second": 572.36,
17
+ "eval_steps_per_second": 12.001,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
+ "grad_norm": 0.535661518573761,
23
  "learning_rate": 1.685534591194969e-05,
24
+ "loss": 0.3272,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "eval_accuracy": 0.8270967741935484,
30
+ "eval_loss": 0.09640489518642426,
31
+ "eval_runtime": 6.1013,
32
+ "eval_samples_per_second": 508.09,
33
+ "eval_steps_per_second": 10.653,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
+ "eval_accuracy": 0.8841935483870967,
39
+ "eval_loss": 0.06340694427490234,
40
+ "eval_runtime": 5.5585,
41
+ "eval_samples_per_second": 557.704,
42
+ "eval_steps_per_second": 11.694,
43
  "step": 954
44
  },
45
  {
46
  "epoch": 3.1446540880503147,
47
+ "grad_norm": 0.4495888650417328,
48
  "learning_rate": 1.371069182389937e-05,
49
+ "loss": 0.1121,
50
  "step": 1000
51
  },
52
  {
53
  "epoch": 4.0,
54
+ "eval_accuracy": 0.9006451612903226,
55
+ "eval_loss": 0.048330824822187424,
56
+ "eval_runtime": 5.5377,
57
+ "eval_samples_per_second": 559.795,
58
+ "eval_steps_per_second": 11.738,
59
  "step": 1272
60
  },
61
  {
62
  "epoch": 4.716981132075472,
63
+ "grad_norm": 0.30502042174339294,
64
  "learning_rate": 1.0566037735849058e-05,
65
+ "loss": 0.0703,
66
  "step": 1500
67
  },
68
  {
69
  "epoch": 5.0,
70
+ "eval_accuracy": 0.9106451612903226,
71
+ "eval_loss": 0.03915739059448242,
72
+ "eval_runtime": 5.8885,
73
+ "eval_samples_per_second": 526.453,
74
+ "eval_steps_per_second": 11.039,
75
  "step": 1590
76
  },
77
  {
78
  "epoch": 6.0,
79
+ "eval_accuracy": 0.9151612903225806,
80
+ "eval_loss": 0.03403216972947121,
81
+ "eval_runtime": 5.6759,
82
+ "eval_samples_per_second": 546.164,
83
+ "eval_steps_per_second": 11.452,
84
  "step": 1908
85
  },
86
  {
87
  "epoch": 6.289308176100629,
88
+ "grad_norm": 0.2584752142429352,
89
  "learning_rate": 7.421383647798742e-06,
90
+ "loss": 0.0544,
91
  "step": 2000
92
  }
93
  ],
 
112
  "train_batch_size": 48,
113
  "trial_name": null,
114
  "trial_params": {
115
+ "alpha": 0.07863039618542955,
116
  "num_train_epochs": 10,
117
+ "temperature": 9
118
  }
119
  }
run-2/checkpoint-2000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3db3eb2a11611660ee273d6a93bf3ff6660f9d8c52045607a37deef8e769ebd6
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7737d5811dfd235fa6fa8c193709de926af9e7ef31f18c3db6f31576650c6630
3
  size 5176
run-2/checkpoint-2500/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b3fb90c12883beb4e859d4cd7efe10db9199511ae406b0eafdc230c0b584ee3
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee889f64a3803bc7e93cca2dfec2e9dd8409e8881683d53e650bd0b988a67ebd
3
  size 268290900
run-2/checkpoint-2500/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a46ddc7cbc6dfd30fc1367bed39faba4e2a9db19e3cde07abe3d78aa457280c
3
  size 536643898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a1047c5067bc7bb3dedb356e3003b40204f97568d490e357f4eff48e0739def
3
  size 536643898
run-2/checkpoint-2500/trainer_state.json CHANGED
@@ -10,100 +10,100 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.6309677419354839,
14
- "eval_loss": 0.2469712644815445,
15
- "eval_runtime": 5.4589,
16
- "eval_samples_per_second": 567.879,
17
- "eval_steps_per_second": 11.907,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
- "grad_norm": 0.5992535352706909,
23
  "learning_rate": 1.685534591194969e-05,
24
- "loss": 0.3971,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
- "eval_accuracy": 0.8422580645161291,
30
- "eval_loss": 0.10715335607528687,
31
- "eval_runtime": 5.8957,
32
- "eval_samples_per_second": 525.809,
33
- "eval_steps_per_second": 11.025,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
- "eval_accuracy": 0.8893548387096775,
39
- "eval_loss": 0.06534729152917862,
40
- "eval_runtime": 5.5645,
41
- "eval_samples_per_second": 557.106,
42
- "eval_steps_per_second": 11.681,
43
  "step": 954
44
  },
45
  {
46
  "epoch": 3.1446540880503147,
47
- "grad_norm": 0.5163053870201111,
48
  "learning_rate": 1.371069182389937e-05,
49
- "loss": 0.1257,
50
  "step": 1000
51
  },
52
  {
53
  "epoch": 4.0,
54
- "eval_accuracy": 0.9077419354838709,
55
- "eval_loss": 0.04786108061671257,
56
- "eval_runtime": 5.7341,
57
- "eval_samples_per_second": 540.622,
58
- "eval_steps_per_second": 11.336,
59
  "step": 1272
60
  },
61
  {
62
  "epoch": 4.716981132075472,
63
- "grad_norm": 0.3360358476638794,
64
  "learning_rate": 1.0566037735849058e-05,
65
- "loss": 0.0738,
66
  "step": 1500
67
  },
68
  {
69
  "epoch": 5.0,
70
- "eval_accuracy": 0.9161290322580645,
71
- "eval_loss": 0.038218673318624496,
72
- "eval_runtime": 5.7603,
73
- "eval_samples_per_second": 538.164,
74
- "eval_steps_per_second": 11.284,
75
  "step": 1590
76
  },
77
  {
78
  "epoch": 6.0,
79
- "eval_accuracy": 0.9258064516129032,
80
- "eval_loss": 0.03327132761478424,
81
- "eval_runtime": 5.565,
82
- "eval_samples_per_second": 557.05,
83
- "eval_steps_per_second": 11.68,
84
  "step": 1908
85
  },
86
  {
87
  "epoch": 6.289308176100629,
88
- "grad_norm": 0.2815706431865692,
89
  "learning_rate": 7.421383647798742e-06,
90
- "loss": 0.056,
91
  "step": 2000
92
  },
93
  {
94
  "epoch": 7.0,
95
- "eval_accuracy": 0.9296774193548387,
96
- "eval_loss": 0.03050699457526207,
97
- "eval_runtime": 5.8127,
98
- "eval_samples_per_second": 533.313,
99
- "eval_steps_per_second": 11.182,
100
  "step": 2226
101
  },
102
  {
103
  "epoch": 7.861635220125786,
104
- "grad_norm": 0.3059941530227661,
105
  "learning_rate": 4.276729559748428e-06,
106
- "loss": 0.0481,
107
  "step": 2500
108
  }
109
  ],
@@ -128,8 +128,8 @@
128
  "train_batch_size": 48,
129
  "trial_name": null,
130
  "trial_params": {
131
- "alpha": 0.1299955797744864,
132
  "num_train_epochs": 10,
133
- "temperature": 4
134
  }
135
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.594516129032258,
14
+ "eval_loss": 0.20276567339897156,
15
+ "eval_runtime": 5.4162,
16
+ "eval_samples_per_second": 572.36,
17
+ "eval_steps_per_second": 12.001,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
+ "grad_norm": 0.535661518573761,
23
  "learning_rate": 1.685534591194969e-05,
24
+ "loss": 0.3272,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "eval_accuracy": 0.8270967741935484,
30
+ "eval_loss": 0.09640489518642426,
31
+ "eval_runtime": 6.1013,
32
+ "eval_samples_per_second": 508.09,
33
+ "eval_steps_per_second": 10.653,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
+ "eval_accuracy": 0.8841935483870967,
39
+ "eval_loss": 0.06340694427490234,
40
+ "eval_runtime": 5.5585,
41
+ "eval_samples_per_second": 557.704,
42
+ "eval_steps_per_second": 11.694,
43
  "step": 954
44
  },
45
  {
46
  "epoch": 3.1446540880503147,
47
+ "grad_norm": 0.4495888650417328,
48
  "learning_rate": 1.371069182389937e-05,
49
+ "loss": 0.1121,
50
  "step": 1000
51
  },
52
  {
53
  "epoch": 4.0,
54
+ "eval_accuracy": 0.9006451612903226,
55
+ "eval_loss": 0.048330824822187424,
56
+ "eval_runtime": 5.5377,
57
+ "eval_samples_per_second": 559.795,
58
+ "eval_steps_per_second": 11.738,
59
  "step": 1272
60
  },
61
  {
62
  "epoch": 4.716981132075472,
63
+ "grad_norm": 0.30502042174339294,
64
  "learning_rate": 1.0566037735849058e-05,
65
+ "loss": 0.0703,
66
  "step": 1500
67
  },
68
  {
69
  "epoch": 5.0,
70
+ "eval_accuracy": 0.9106451612903226,
71
+ "eval_loss": 0.03915739059448242,
72
+ "eval_runtime": 5.8885,
73
+ "eval_samples_per_second": 526.453,
74
+ "eval_steps_per_second": 11.039,
75
  "step": 1590
76
  },
77
  {
78
  "epoch": 6.0,
79
+ "eval_accuracy": 0.9151612903225806,
80
+ "eval_loss": 0.03403216972947121,
81
+ "eval_runtime": 5.6759,
82
+ "eval_samples_per_second": 546.164,
83
+ "eval_steps_per_second": 11.452,
84
  "step": 1908
85
  },
86
  {
87
  "epoch": 6.289308176100629,
88
+ "grad_norm": 0.2584752142429352,
89
  "learning_rate": 7.421383647798742e-06,
90
+ "loss": 0.0544,
91
  "step": 2000
92
  },
93
  {
94
  "epoch": 7.0,
95
+ "eval_accuracy": 0.9245161290322581,
96
+ "eval_loss": 0.03124151937663555,
97
+ "eval_runtime": 5.5585,
98
+ "eval_samples_per_second": 557.7,
99
+ "eval_steps_per_second": 11.694,
100
  "step": 2226
101
  },
102
  {
103
  "epoch": 7.861635220125786,
104
+ "grad_norm": 0.2786354124546051,
105
  "learning_rate": 4.276729559748428e-06,
106
+ "loss": 0.047,
107
  "step": 2500
108
  }
109
  ],
 
128
  "train_batch_size": 48,
129
  "trial_name": null,
130
  "trial_params": {
131
+ "alpha": 0.07863039618542955,
132
  "num_train_epochs": 10,
133
+ "temperature": 9
134
  }
135
  }
run-2/checkpoint-2500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3db3eb2a11611660ee273d6a93bf3ff6660f9d8c52045607a37deef8e769ebd6
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7737d5811dfd235fa6fa8c193709de926af9e7ef31f18c3db6f31576650c6630
3
  size 5176
runs/Aug28_01-37-39_61274092231a/events.out.tfevents.1724811208.61274092231a.346.2 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1cea2f8256d68d2abf2aa506badc4ce36a2dfcc35c96982669d89db0bd9976a
3
- size 15157
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a719914dbc28d496fc36a14850fbfe4e5ee6026f2efade9402f7effe379c4f15
3
+ size 16548