chansung commited on
Commit
fcda1e0
1 Parent(s): bde5998

Model save

Browse files
Files changed (4) hide show
  1. README.md +4 -5
  2. all_results.json +4 -9
  3. train_results.json +4 -4
  4. trainer_state.json +55 -55
README.md CHANGED
@@ -1,11 +1,10 @@
1
  ---
2
  base_model: meta-llama/Meta-Llama-3-8B
3
  datasets:
4
- - llama-duo/synth_classification_dataset_dedup
5
  library_name: peft
6
  license: llama3
7
  tags:
8
- - alignment-handbook
9
  - trl
10
  - sft
11
  - generated_from_trainer
@@ -19,9 +18,9 @@ should probably proofread and complete it, then remove this comment. -->
19
 
20
  # llama3.1-8b-gpt4o_100k_classification-k
21
 
22
- This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B) on the llama-duo/synth_classification_dataset_dedup dataset.
23
  It achieves the following results on the evaluation set:
24
- - Loss: 1.6323
25
 
26
  ## Model description
27
 
@@ -58,7 +57,7 @@ The following hyperparameters were used during training:
58
 
59
  | Training Loss | Epoch | Step | Validation Loss |
60
  |:-------------:|:------:|:----:|:---------------:|
61
- | 1.0751 | 0.9958 | 118 | 1.6323 |
62
 
63
 
64
  ### Framework versions
 
1
  ---
2
  base_model: meta-llama/Meta-Llama-3-8B
3
  datasets:
4
+ - generator
5
  library_name: peft
6
  license: llama3
7
  tags:
 
8
  - trl
9
  - sft
10
  - generated_from_trainer
 
18
 
19
  # llama3.1-8b-gpt4o_100k_classification-k
20
 
21
+ This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B) on the generator dataset.
22
  It achieves the following results on the evaluation set:
23
+ - Loss: 1.6382
24
 
25
  ## Model description
26
 
 
57
 
58
  | Training Loss | Epoch | Step | Validation Loss |
59
  |:-------------:|:------:|:----:|:---------------:|
60
+ | 1.075 | 0.9958 | 118 | 1.6382 |
61
 
62
 
63
  ### Framework versions
all_results.json CHANGED
@@ -1,14 +1,9 @@
1
  {
2
  "epoch": 0.9957805907172996,
3
- "eval_loss": 1.6322884559631348,
4
- "eval_runtime": 1.1485,
5
- "eval_samples": 16,
6
- "eval_samples_per_second": 1.741,
7
- "eval_steps_per_second": 0.871,
8
  "total_flos": 3.48383355932246e+17,
9
- "train_loss": 1.2928365105289523,
10
- "train_runtime": 1374.4457,
11
  "train_samples": 92634,
12
- "train_samples_per_second": 5.507,
13
- "train_steps_per_second": 0.086
14
  }
 
1
  {
2
  "epoch": 0.9957805907172996,
 
 
 
 
 
3
  "total_flos": 3.48383355932246e+17,
4
+ "train_loss": 1.291591884726185,
5
+ "train_runtime": 1283.2771,
6
  "train_samples": 92634,
7
+ "train_samples_per_second": 5.898,
8
+ "train_steps_per_second": 0.092
9
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 0.9957805907172996,
3
  "total_flos": 3.48383355932246e+17,
4
- "train_loss": 1.2928365105289523,
5
- "train_runtime": 1374.4457,
6
  "train_samples": 92634,
7
- "train_samples_per_second": 5.507,
8
- "train_steps_per_second": 0.086
9
  }
 
1
  {
2
  "epoch": 0.9957805907172996,
3
  "total_flos": 3.48383355932246e+17,
4
+ "train_loss": 1.291591884726185,
5
+ "train_runtime": 1283.2771,
6
  "train_samples": 92634,
7
+ "train_samples_per_second": 5.898,
8
+ "train_steps_per_second": 0.092
9
  }
trainer_state.json CHANGED
@@ -10,188 +10,188 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.008438818565400843,
13
- "grad_norm": 1.0222958326339722,
14
  "learning_rate": 2.4999999999999998e-05,
15
  "loss": 2.86,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.04219409282700422,
20
- "grad_norm": 1.6936215162277222,
21
  "learning_rate": 0.000125,
22
- "loss": 2.8152,
23
  "step": 5
24
  },
25
  {
26
  "epoch": 0.08438818565400844,
27
- "grad_norm": 2.3276407718658447,
28
  "learning_rate": 0.00025,
29
- "loss": 2.5324,
30
  "step": 10
31
  },
32
  {
33
  "epoch": 0.12658227848101267,
34
- "grad_norm": 6.024274826049805,
35
  "learning_rate": 0.0002994074763922825,
36
- "loss": 1.692,
37
  "step": 15
38
  },
39
  {
40
  "epoch": 0.16877637130801687,
41
- "grad_norm": 0.993145763874054,
42
  "learning_rate": 0.0002958034371120616,
43
- "loss": 1.4859,
44
  "step": 20
45
  },
46
  {
47
  "epoch": 0.2109704641350211,
48
- "grad_norm": 0.7716729640960693,
49
  "learning_rate": 0.000289003441114775,
50
- "loss": 1.326,
51
  "step": 25
52
  },
53
  {
54
  "epoch": 0.25316455696202533,
55
- "grad_norm": 0.5372955203056335,
56
  "learning_rate": 0.0002791565417651033,
57
- "loss": 1.2597,
58
  "step": 30
59
  },
60
  {
61
  "epoch": 0.29535864978902954,
62
- "grad_norm": 0.5266535878181458,
63
  "learning_rate": 0.00026647857940770634,
64
- "loss": 1.1995,
65
  "step": 35
66
  },
67
  {
68
  "epoch": 0.33755274261603374,
69
- "grad_norm": 0.3248014748096466,
70
  "learning_rate": 0.0002512474502277316,
71
- "loss": 1.1785,
72
  "step": 40
73
  },
74
  {
75
  "epoch": 0.379746835443038,
76
- "grad_norm": 0.22964197397232056,
77
  "learning_rate": 0.00023379701487054785,
78
- "loss": 1.1528,
79
  "step": 45
80
  },
81
  {
82
  "epoch": 0.4219409282700422,
83
- "grad_norm": 0.2224261313676834,
84
  "learning_rate": 0.00021450978034147806,
85
- "loss": 1.1343,
86
  "step": 50
87
  },
88
  {
89
  "epoch": 0.4641350210970464,
90
- "grad_norm": 0.22749578952789307,
91
  "learning_rate": 0.00019380851559554636,
92
- "loss": 1.1165,
93
  "step": 55
94
  },
95
  {
96
  "epoch": 0.5063291139240507,
97
- "grad_norm": 0.2278250902891159,
98
  "learning_rate": 0.00017214698460037218,
99
- "loss": 1.1106,
100
  "step": 60
101
  },
102
  {
103
  "epoch": 0.5485232067510548,
104
- "grad_norm": 0.22940242290496826,
105
  "learning_rate": 0.00015,
106
- "loss": 1.0946,
107
  "step": 65
108
  },
109
  {
110
  "epoch": 0.5907172995780591,
111
- "grad_norm": 0.20619849860668182,
112
  "learning_rate": 0.00012785301539962782,
113
- "loss": 1.0952,
114
  "step": 70
115
  },
116
  {
117
  "epoch": 0.6329113924050633,
118
- "grad_norm": 0.19738492369651794,
119
  "learning_rate": 0.00010619148440445364,
120
- "loss": 1.0891,
121
  "step": 75
122
  },
123
  {
124
  "epoch": 0.6751054852320675,
125
- "grad_norm": 0.2077159434556961,
126
  "learning_rate": 8.549021965852197e-05,
127
- "loss": 1.0828,
128
  "step": 80
129
  },
130
  {
131
  "epoch": 0.7172995780590717,
132
- "grad_norm": 0.1939452439546585,
133
  "learning_rate": 6.620298512945214e-05,
134
- "loss": 1.0723,
135
  "step": 85
136
  },
137
  {
138
  "epoch": 0.759493670886076,
139
- "grad_norm": 0.20105819404125214,
140
  "learning_rate": 4.8752549772268444e-05,
141
- "loss": 1.0617,
142
  "step": 90
143
  },
144
  {
145
  "epoch": 0.8016877637130801,
146
- "grad_norm": 0.2106432467699051,
147
  "learning_rate": 3.352142059229365e-05,
148
- "loss": 1.0647,
149
  "step": 95
150
  },
151
  {
152
  "epoch": 0.8438818565400844,
153
- "grad_norm": 0.19021108746528625,
154
  "learning_rate": 2.0843458234896666e-05,
155
- "loss": 1.0778,
156
  "step": 100
157
  },
158
  {
159
  "epoch": 0.8860759493670886,
160
- "grad_norm": 0.23987625539302826,
161
  "learning_rate": 1.0996558885224993e-05,
162
- "loss": 1.0761,
163
  "step": 105
164
  },
165
  {
166
  "epoch": 0.9282700421940928,
167
- "grad_norm": 0.1889180988073349,
168
  "learning_rate": 4.1965628879383875e-06,
169
- "loss": 1.0657,
170
  "step": 110
171
  },
172
  {
173
  "epoch": 0.9704641350210971,
174
- "grad_norm": 0.19509291648864746,
175
  "learning_rate": 5.925236077174655e-07,
176
- "loss": 1.0751,
177
  "step": 115
178
  },
179
  {
180
  "epoch": 0.9957805907172996,
181
- "eval_loss": 1.6322884559631348,
182
- "eval_runtime": 1.1519,
183
- "eval_samples_per_second": 1.736,
184
- "eval_steps_per_second": 0.868,
185
  "step": 118
186
  },
187
  {
188
  "epoch": 0.9957805907172996,
189
  "step": 118,
190
  "total_flos": 3.48383355932246e+17,
191
- "train_loss": 1.2928365105289523,
192
- "train_runtime": 1374.4457,
193
- "train_samples_per_second": 5.507,
194
- "train_steps_per_second": 0.086
195
  }
196
  ],
197
  "logging_steps": 5,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.008438818565400843,
13
+ "grad_norm": 1.022000789642334,
14
  "learning_rate": 2.4999999999999998e-05,
15
  "loss": 2.86,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.04219409282700422,
20
+ "grad_norm": 1.4761569499969482,
21
  "learning_rate": 0.000125,
22
+ "loss": 2.8149,
23
  "step": 5
24
  },
25
  {
26
  "epoch": 0.08438818565400844,
27
+ "grad_norm": 2.290438413619995,
28
  "learning_rate": 0.00025,
29
+ "loss": 2.5283,
30
  "step": 10
31
  },
32
  {
33
  "epoch": 0.12658227848101267,
34
+ "grad_norm": 5.928793430328369,
35
  "learning_rate": 0.0002994074763922825,
36
+ "loss": 1.6863,
37
  "step": 15
38
  },
39
  {
40
  "epoch": 0.16877637130801687,
41
+ "grad_norm": 0.9887209534645081,
42
  "learning_rate": 0.0002958034371120616,
43
+ "loss": 1.4771,
44
  "step": 20
45
  },
46
  {
47
  "epoch": 0.2109704641350211,
48
+ "grad_norm": 0.7580465078353882,
49
  "learning_rate": 0.000289003441114775,
50
+ "loss": 1.3231,
51
  "step": 25
52
  },
53
  {
54
  "epoch": 0.25316455696202533,
55
+ "grad_norm": 0.5388275384902954,
56
  "learning_rate": 0.0002791565417651033,
57
+ "loss": 1.2578,
58
  "step": 30
59
  },
60
  {
61
  "epoch": 0.29535864978902954,
62
+ "grad_norm": 0.5278171896934509,
63
  "learning_rate": 0.00026647857940770634,
64
+ "loss": 1.1985,
65
  "step": 35
66
  },
67
  {
68
  "epoch": 0.33755274261603374,
69
+ "grad_norm": 0.31164848804473877,
70
  "learning_rate": 0.0002512474502277316,
71
+ "loss": 1.178,
72
  "step": 40
73
  },
74
  {
75
  "epoch": 0.379746835443038,
76
+ "grad_norm": 0.22269943356513977,
77
  "learning_rate": 0.00023379701487054785,
78
+ "loss": 1.1526,
79
  "step": 45
80
  },
81
  {
82
  "epoch": 0.4219409282700422,
83
+ "grad_norm": 0.220732182264328,
84
  "learning_rate": 0.00021450978034147806,
85
+ "loss": 1.134,
86
  "step": 50
87
  },
88
  {
89
  "epoch": 0.4641350210970464,
90
+ "grad_norm": 0.2316388487815857,
91
  "learning_rate": 0.00019380851559554636,
92
+ "loss": 1.1167,
93
  "step": 55
94
  },
95
  {
96
  "epoch": 0.5063291139240507,
97
+ "grad_norm": 0.2175298035144806,
98
  "learning_rate": 0.00017214698460037218,
99
+ "loss": 1.1102,
100
  "step": 60
101
  },
102
  {
103
  "epoch": 0.5485232067510548,
104
+ "grad_norm": 0.21935291588306427,
105
  "learning_rate": 0.00015,
106
+ "loss": 1.0942,
107
  "step": 65
108
  },
109
  {
110
  "epoch": 0.5907172995780591,
111
+ "grad_norm": 0.2013280689716339,
112
  "learning_rate": 0.00012785301539962782,
113
+ "loss": 1.0947,
114
  "step": 70
115
  },
116
  {
117
  "epoch": 0.6329113924050633,
118
+ "grad_norm": 0.18674449622631073,
119
  "learning_rate": 0.00010619148440445364,
120
+ "loss": 1.0888,
121
  "step": 75
122
  },
123
  {
124
  "epoch": 0.6751054852320675,
125
+ "grad_norm": 0.21201685070991516,
126
  "learning_rate": 8.549021965852197e-05,
127
+ "loss": 1.0819,
128
  "step": 80
129
  },
130
  {
131
  "epoch": 0.7172995780590717,
132
+ "grad_norm": 0.19578513503074646,
133
  "learning_rate": 6.620298512945214e-05,
134
+ "loss": 1.0722,
135
  "step": 85
136
  },
137
  {
138
  "epoch": 0.759493670886076,
139
+ "grad_norm": 0.19662944972515106,
140
  "learning_rate": 4.8752549772268444e-05,
141
+ "loss": 1.0615,
142
  "step": 90
143
  },
144
  {
145
  "epoch": 0.8016877637130801,
146
+ "grad_norm": 0.20801547169685364,
147
  "learning_rate": 3.352142059229365e-05,
148
+ "loss": 1.0642,
149
  "step": 95
150
  },
151
  {
152
  "epoch": 0.8438818565400844,
153
+ "grad_norm": 0.18992270529270172,
154
  "learning_rate": 2.0843458234896666e-05,
155
+ "loss": 1.0775,
156
  "step": 100
157
  },
158
  {
159
  "epoch": 0.8860759493670886,
160
+ "grad_norm": 0.21922025084495544,
161
  "learning_rate": 1.0996558885224993e-05,
162
+ "loss": 1.0757,
163
  "step": 105
164
  },
165
  {
166
  "epoch": 0.9282700421940928,
167
+ "grad_norm": 0.18345749378204346,
168
  "learning_rate": 4.1965628879383875e-06,
169
+ "loss": 1.0654,
170
  "step": 110
171
  },
172
  {
173
  "epoch": 0.9704641350210971,
174
+ "grad_norm": 0.19231846928596497,
175
  "learning_rate": 5.925236077174655e-07,
176
+ "loss": 1.075,
177
  "step": 115
178
  },
179
  {
180
  "epoch": 0.9957805907172996,
181
+ "eval_loss": 1.6381601095199585,
182
+ "eval_runtime": 1.1564,
183
+ "eval_samples_per_second": 1.729,
184
+ "eval_steps_per_second": 0.865,
185
  "step": 118
186
  },
187
  {
188
  "epoch": 0.9957805907172996,
189
  "step": 118,
190
  "total_flos": 3.48383355932246e+17,
191
+ "train_loss": 1.291591884726185,
192
+ "train_runtime": 1283.2771,
193
+ "train_samples_per_second": 5.898,
194
+ "train_steps_per_second": 0.092
195
  }
196
  ],
197
  "logging_steps": 5,