avsolatorio commited on
Commit
cc32733
·
verified ·
1 Parent(s): f4f190a

Training in progress, step 1000, checkpoint

Browse files
last-checkpoint/config.json CHANGED
@@ -8,184 +8,184 @@
8
  "hidden_dropout_prob": 0.1,
9
  "hidden_size": 768,
10
  "id2label": {
11
- "0": "Water Supply",
12
- "1": "Sexual Orientation and Gender Identity",
13
- "2": "Investment Climate",
14
- "3": "Environment",
15
- "4": "Water Resources Management",
16
- "5": "Labor Markets",
17
- "6": "Governance",
18
- "7": "Competition Policy",
19
- "8": "Long-Term Finance",
20
- "9": "Food Security Update",
21
- "10": "Social Protection",
22
- "11": "Inequality and Shared Prosperity",
23
- "12": "Infectious diseases and Vaccines",
24
- "13": "Jobs and Development",
25
- "14": "Energy",
26
- "15": "Education and Technology",
27
- "16": "Debt Relief",
28
- "17": "Financial Sector",
29
- "18": "Measuring Poverty",
30
- "19": "Education",
31
- "20": "Water",
32
- "21": "Procurement for Development",
33
- "22": "Livestock and Sustainability",
34
- "23": "Disaster Risk Management",
35
- "24": "Financial Inclusion",
36
- "25": "Trade Facilitation and Logistics",
37
- "26": "Financial Integrity",
38
- "27": "Health",
39
- "28": "Regional Integration",
40
- "29": "Forests and Landscapes",
41
- "30": "Jobs & Development",
42
- "31": "Payment Systems",
43
- "32": "Agribusiness and Value Chains",
44
- "33": "Disability Inclusion",
45
- "34": "Marine Plastic Pollution",
46
- "35": "Global Financing Facility for Women, Children and Adolescents",
47
- "36": "Skills Development",
48
- "37": "Sustainable Infrastructure Finance",
49
- "38": "Land",
50
- "39": "Sustainable Communities",
51
- "40": "Higher Education",
52
- "41": "Climate Change",
53
- "42": "Small and Medium Enterprises Finance",
54
- "43": "Teachers",
55
- "44": "Migration",
56
- "45": "Debt",
57
- "46": "Urban Development",
58
- "47": "Mining Investment and Governance Review",
59
- "48": "Innovation and Entrepreneurship",
60
- "49": "Natural Capital",
61
- "50": "Transport",
62
- "51": "Gender",
63
- "52": "Safety Nets and Cash Transfers",
64
- "53": "Inclusive Cities",
65
- "54": "Extractive Industries",
66
- "55": "Global Value Chains",
67
- "56": "Agriculture and Food",
68
- "57": "Universal Health Coverage",
69
- "58": "Sanitation",
70
- "59": "Gas Flaring Reduction",
71
- "60": "COVID-19 Hub",
72
- "61": "Girls' Education",
73
- "62": "Macroeconomics",
74
- "63": "Trade",
75
- "64": "Pandemic Preparedness and COVID-19",
76
- "65": "Infrastructure",
77
- "66": "Oceans, Fisheries, and Coastal Economies",
78
- "67": "Social Sustainability and Inclusion",
79
- "68": "Community-Driven Development",
80
- "69": "Credit Infrastructure",
81
- "70": "Pollution",
82
- "71": "Pensions",
83
- "72": "Financial Stability",
84
- "73": "One Health",
85
- "74": "Indigenous Peoples",
86
- "75": "Digital Development",
87
- "76": "Climate-Smart Agriculture",
88
- "77": "Food System Jobs",
89
- "78": "Early Childhood Development",
90
- "79": "Biodiversity",
91
- "80": "Poverty",
92
- "81": "Fragility, Conflict, and Violence",
93
- "82": "Competitiveness",
94
- "83": "Social Inclusion",
95
- "84": "Nutrition",
96
- "85": "Taxes and Government Revenue",
97
- "86": "Water in Agriculture"
98
  },
99
  "initializer_range": 0.02,
100
  "intermediate_size": 3072,
101
  "label2id": {
102
- "Agribusiness and Value Chains": 32,
103
- "Agriculture and Food": 56,
104
- "Biodiversity": 79,
105
- "COVID-19 Hub": 60,
106
- "Climate Change": 41,
107
- "Climate-Smart Agriculture": 76,
108
- "Community-Driven Development": 68,
109
- "Competition Policy": 7,
110
- "Competitiveness": 82,
111
- "Credit Infrastructure": 69,
112
- "Debt": 45,
113
- "Debt Relief": 16,
114
- "Digital Development": 75,
115
- "Disability Inclusion": 33,
116
- "Disaster Risk Management": 23,
117
- "Early Childhood Development": 78,
118
- "Education": 19,
119
- "Education and Technology": 15,
120
- "Energy": 14,
121
- "Environment": 3,
122
- "Extractive Industries": 54,
123
- "Financial Inclusion": 24,
124
- "Financial Integrity": 26,
125
- "Financial Sector": 17,
126
- "Financial Stability": 72,
127
- "Food Security Update": 9,
128
- "Food System Jobs": 77,
129
- "Forests and Landscapes": 29,
130
- "Fragility, Conflict, and Violence": 81,
131
- "Gas Flaring Reduction": 59,
132
- "Gender": 51,
133
- "Girls' Education": 61,
134
- "Global Financing Facility for Women, Children and Adolescents": 35,
135
- "Global Value Chains": 55,
136
- "Governance": 6,
137
- "Health": 27,
138
- "Higher Education": 40,
139
- "Inclusive Cities": 53,
140
- "Indigenous Peoples": 74,
141
- "Inequality and Shared Prosperity": 11,
142
- "Infectious diseases and Vaccines": 12,
143
- "Infrastructure": 65,
144
- "Innovation and Entrepreneurship": 48,
145
- "Investment Climate": 2,
146
- "Jobs & Development": 30,
147
- "Jobs and Development": 13,
148
- "Labor Markets": 5,
149
- "Land": 38,
150
- "Livestock and Sustainability": 22,
151
- "Long-Term Finance": 8,
152
- "Macroeconomics": 62,
153
- "Marine Plastic Pollution": 34,
154
- "Measuring Poverty": 18,
155
- "Migration": 44,
156
- "Mining Investment and Governance Review": 47,
157
- "Natural Capital": 49,
158
- "Nutrition": 84,
159
- "Oceans, Fisheries, and Coastal Economies": 66,
160
- "One Health": 73,
161
- "Pandemic Preparedness and COVID-19": 64,
162
- "Payment Systems": 31,
163
- "Pensions": 71,
164
- "Pollution": 70,
165
- "Poverty": 80,
166
- "Procurement for Development": 21,
167
- "Regional Integration": 28,
168
- "Safety Nets and Cash Transfers": 52,
169
- "Sanitation": 58,
170
- "Sexual Orientation and Gender Identity": 1,
171
- "Skills Development": 36,
172
- "Small and Medium Enterprises Finance": 42,
173
- "Social Inclusion": 83,
174
- "Social Protection": 10,
175
- "Social Sustainability and Inclusion": 67,
176
- "Sustainable Communities": 39,
177
- "Sustainable Infrastructure Finance": 37,
178
- "Taxes and Government Revenue": 85,
179
- "Teachers": 43,
180
- "Trade": 63,
181
- "Trade Facilitation and Logistics": 25,
182
- "Transport": 50,
183
- "Universal Health Coverage": 57,
184
- "Urban Development": 46,
185
- "Water": 20,
186
- "Water Resources Management": 4,
187
- "Water Supply": 0,
188
- "Water in Agriculture": 86
189
  },
190
  "layer_norm_eps": 1e-07,
191
  "max_position_embeddings": 512,
 
8
  "hidden_dropout_prob": 0.1,
9
  "hidden_size": 768,
10
  "id2label": {
11
+ "0": "Global Financing Facility for Women, Children and Adolescents",
12
+ "1": "Competitiveness",
13
+ "2": "Sustainable Communities",
14
+ "3": "Education",
15
+ "4": "Natural Capital",
16
+ "5": "Regional Integration",
17
+ "6": "Jobs and Development",
18
+ "7": "Innovation and Entrepreneurship",
19
+ "8": "Inclusive Cities",
20
+ "9": "Jobs & Development",
21
+ "10": "Transport",
22
+ "11": "Sustainable Infrastructure Finance",
23
+ "12": "Pandemic Preparedness and COVID-19",
24
+ "13": "Taxes and Government Revenue",
25
+ "14": "Labor Markets",
26
+ "15": "Measuring Poverty",
27
+ "16": "Food Security Update",
28
+ "17": "Disability Inclusion",
29
+ "18": "Agriculture and Food",
30
+ "19": "Trade Facilitation and Logistics",
31
+ "20": "Social Protection",
32
+ "21": "One Health",
33
+ "22": "Biodiversity",
34
+ "23": "Social Inclusion",
35
+ "24": "Credit Infrastructure",
36
+ "25": "Water Supply",
37
+ "26": "Early Childhood Development",
38
+ "27": "Food System Jobs",
39
+ "28": "Migration",
40
+ "29": "Indigenous Peoples",
41
+ "30": "Universal Health Coverage",
42
+ "31": "Financial Sector",
43
+ "32": "Procurement for Development",
44
+ "33": "Inequality and Shared Prosperity",
45
+ "34": "COVID-19 Hub",
46
+ "35": "Poverty",
47
+ "36": "Financial Stability",
48
+ "37": "Digital Development",
49
+ "38": "Long-Term Finance",
50
+ "39": "Gas Flaring Reduction",
51
+ "40": "Mining Investment and Governance Review",
52
+ "41": "Small and Medium Enterprises Finance",
53
+ "42": "Infrastructure",
54
+ "43": "Health",
55
+ "44": "Sexual Orientation and Gender Identity",
56
+ "45": "Nutrition",
57
+ "46": "Financial Inclusion",
58
+ "47": "Fragility, Conflict, and Violence",
59
+ "48": "Debt Relief",
60
+ "49": "Disaster Risk Management",
61
+ "50": "Water in Agriculture",
62
+ "51": "Livestock and Sustainability",
63
+ "52": "Global Value Chains",
64
+ "53": "Competition Policy",
65
+ "54": "Pollution",
66
+ "55": "Urban Development",
67
+ "56": "Gender",
68
+ "57": "Safety Nets and Cash Transfers",
69
+ "58": "Forests and Landscapes",
70
+ "59": "Water Resources Management",
71
+ "60": "Extractive Industries",
72
+ "61": "Social Sustainability and Inclusion",
73
+ "62": "Energy",
74
+ "63": "Girls' Education",
75
+ "64": "Environment",
76
+ "65": "Marine Plastic Pollution",
77
+ "66": "Education and Technology",
78
+ "67": "Financial Integrity",
79
+ "68": "Oceans, Fisheries, and Coastal Economies",
80
+ "69": "Sanitation",
81
+ "70": "Land",
82
+ "71": "Higher Education",
83
+ "72": "Teachers",
84
+ "73": "Investment Climate",
85
+ "74": "Debt",
86
+ "75": "Climate Change",
87
+ "76": "Trade",
88
+ "77": "Skills Development",
89
+ "78": "Agribusiness and Value Chains",
90
+ "79": "Climate-Smart Agriculture",
91
+ "80": "Pensions",
92
+ "81": "Infectious diseases and Vaccines",
93
+ "82": "Payment Systems",
94
+ "83": "Community-Driven Development",
95
+ "84": "Water",
96
+ "85": "Governance",
97
+ "86": "Macroeconomics"
98
  },
99
  "initializer_range": 0.02,
100
  "intermediate_size": 3072,
101
  "label2id": {
102
+ "Agribusiness and Value Chains": 78,
103
+ "Agriculture and Food": 18,
104
+ "Biodiversity": 22,
105
+ "COVID-19 Hub": 34,
106
+ "Climate Change": 75,
107
+ "Climate-Smart Agriculture": 79,
108
+ "Community-Driven Development": 83,
109
+ "Competition Policy": 53,
110
+ "Competitiveness": 1,
111
+ "Credit Infrastructure": 24,
112
+ "Debt": 74,
113
+ "Debt Relief": 48,
114
+ "Digital Development": 37,
115
+ "Disability Inclusion": 17,
116
+ "Disaster Risk Management": 49,
117
+ "Early Childhood Development": 26,
118
+ "Education": 3,
119
+ "Education and Technology": 66,
120
+ "Energy": 62,
121
+ "Environment": 64,
122
+ "Extractive Industries": 60,
123
+ "Financial Inclusion": 46,
124
+ "Financial Integrity": 67,
125
+ "Financial Sector": 31,
126
+ "Financial Stability": 36,
127
+ "Food Security Update": 16,
128
+ "Food System Jobs": 27,
129
+ "Forests and Landscapes": 58,
130
+ "Fragility, Conflict, and Violence": 47,
131
+ "Gas Flaring Reduction": 39,
132
+ "Gender": 56,
133
+ "Girls' Education": 63,
134
+ "Global Financing Facility for Women, Children and Adolescents": 0,
135
+ "Global Value Chains": 52,
136
+ "Governance": 85,
137
+ "Health": 43,
138
+ "Higher Education": 71,
139
+ "Inclusive Cities": 8,
140
+ "Indigenous Peoples": 29,
141
+ "Inequality and Shared Prosperity": 33,
142
+ "Infectious diseases and Vaccines": 81,
143
+ "Infrastructure": 42,
144
+ "Innovation and Entrepreneurship": 7,
145
+ "Investment Climate": 73,
146
+ "Jobs & Development": 9,
147
+ "Jobs and Development": 6,
148
+ "Labor Markets": 14,
149
+ "Land": 70,
150
+ "Livestock and Sustainability": 51,
151
+ "Long-Term Finance": 38,
152
+ "Macroeconomics": 86,
153
+ "Marine Plastic Pollution": 65,
154
+ "Measuring Poverty": 15,
155
+ "Migration": 28,
156
+ "Mining Investment and Governance Review": 40,
157
+ "Natural Capital": 4,
158
+ "Nutrition": 45,
159
+ "Oceans, Fisheries, and Coastal Economies": 68,
160
+ "One Health": 21,
161
+ "Pandemic Preparedness and COVID-19": 12,
162
+ "Payment Systems": 82,
163
+ "Pensions": 80,
164
+ "Pollution": 54,
165
+ "Poverty": 35,
166
+ "Procurement for Development": 32,
167
+ "Regional Integration": 5,
168
+ "Safety Nets and Cash Transfers": 57,
169
+ "Sanitation": 69,
170
+ "Sexual Orientation and Gender Identity": 44,
171
+ "Skills Development": 77,
172
+ "Small and Medium Enterprises Finance": 41,
173
+ "Social Inclusion": 23,
174
+ "Social Protection": 20,
175
+ "Social Sustainability and Inclusion": 61,
176
+ "Sustainable Communities": 2,
177
+ "Sustainable Infrastructure Finance": 11,
178
+ "Taxes and Government Revenue": 13,
179
+ "Teachers": 72,
180
+ "Trade": 76,
181
+ "Trade Facilitation and Logistics": 19,
182
+ "Transport": 10,
183
+ "Universal Health Coverage": 30,
184
+ "Urban Development": 55,
185
+ "Water": 84,
186
+ "Water Resources Management": 59,
187
+ "Water Supply": 25,
188
+ "Water in Agriculture": 50
189
  },
190
  "layer_norm_eps": 1e-07,
191
  "max_position_embeddings": 512,
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c7c809bfb9e0c09a991cbc751afbd7d153af1ea8be3d21c60d821e480d58397
3
  size 567860028
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56824edd4761257a415044881a1993322b91ee41909153e8164b07d4cdb8e299
3
  size 567860028
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54c71b342dcaf811b4743acd66cc3b8dab74d01a77947a9e074165554a191a3f
3
  size 1135783354
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65d705b23d30e8d777f2c745aee65fc860e7de16b51a91182b36a495553f35f3
3
  size 1135783354
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c6930021eee2ce84b130095d62c41e90293ccc2830acaf7c168280128912a1f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34649568145f465443beb487f2295ab3dad9e5f49f758646dd823029413e18fe
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:abe099e1b5028bec45c9b921f52c62bf9191fdd92a0c19da1f44a14388797be1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e136870ce715682b288c29fb4d05a0aec61f1a74b2cc393d7fb9e66e4965261
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,24 +1,24 @@
1
  {
2
- "best_metric": 0.03578707203269005,
3
- "best_model_checkpoint": "doc-topic-model_eval-00_train-01/checkpoint-20000",
4
- "epoch": 9.861932938856016,
5
  "eval_steps": 1000,
6
- "global_step": 20000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.2465483234714004,
13
- "grad_norm": 0.32120612263679504,
14
- "learning_rate": 1.95069033530572e-05,
15
- "loss": 0.1666,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 0.4930966469428008,
20
- "grad_norm": 0.33755889534950256,
21
- "learning_rate": 1.90138067061144e-05,
22
  "loss": 0.0936,
23
  "step": 1000
24
  },
@@ -26,515 +26,30 @@
26
  "epoch": 0.4930966469428008,
27
  "eval_accuracy": 0.9814660487265615,
28
  "eval_f1": 0.0,
29
- "eval_loss": 0.08871060609817505,
30
  "eval_precision": 0.0,
31
  "eval_recall": 0.0,
32
- "eval_runtime": 26.4203,
33
- "eval_samples_per_second": 306.961,
34
- "eval_steps_per_second": 19.19,
35
  "step": 1000
36
- },
37
- {
38
- "epoch": 0.7396449704142012,
39
- "grad_norm": 0.38695186376571655,
40
- "learning_rate": 1.85207100591716e-05,
41
- "loss": 0.0861,
42
- "step": 1500
43
- },
44
- {
45
- "epoch": 0.9861932938856016,
46
- "grad_norm": 0.39813894033432007,
47
- "learning_rate": 1.80276134122288e-05,
48
- "loss": 0.0774,
49
- "step": 2000
50
- },
51
- {
52
- "epoch": 0.9861932938856016,
53
- "eval_accuracy": 0.9814660487265615,
54
- "eval_f1": 0.0,
55
- "eval_loss": 0.06950555741786957,
56
- "eval_precision": 0.0,
57
- "eval_recall": 0.0,
58
- "eval_runtime": 25.7659,
59
- "eval_samples_per_second": 314.757,
60
- "eval_steps_per_second": 19.677,
61
- "step": 2000
62
- },
63
- {
64
- "epoch": 1.232741617357002,
65
- "grad_norm": 0.452305406332016,
66
- "learning_rate": 1.7534516765285997e-05,
67
- "loss": 0.0685,
68
- "step": 2500
69
- },
70
- {
71
- "epoch": 1.4792899408284024,
72
- "grad_norm": 0.46256884932518005,
73
- "learning_rate": 1.70414201183432e-05,
74
- "loss": 0.0622,
75
- "step": 3000
76
- },
77
- {
78
- "epoch": 1.4792899408284024,
79
- "eval_accuracy": 0.9821973723372592,
80
- "eval_f1": 0.09950534088465122,
81
- "eval_loss": 0.05685883387923241,
82
- "eval_precision": 0.7958715596330275,
83
- "eval_recall": 0.05307027605719966,
84
- "eval_runtime": 26.3616,
85
- "eval_samples_per_second": 307.644,
86
- "eval_steps_per_second": 19.232,
87
- "step": 3000
88
- },
89
- {
90
- "epoch": 1.725838264299803,
91
- "grad_norm": 0.35877692699432373,
92
- "learning_rate": 1.6548323471400396e-05,
93
- "loss": 0.0579,
94
- "step": 3500
95
- },
96
- {
97
- "epoch": 1.972386587771203,
98
- "grad_norm": 0.43726620078086853,
99
- "learning_rate": 1.6055226824457594e-05,
100
- "loss": 0.0529,
101
- "step": 4000
102
- },
103
- {
104
- "epoch": 1.972386587771203,
105
- "eval_accuracy": 0.9842609521379877,
106
- "eval_f1": 0.3101198981176616,
107
- "eval_loss": 0.050210438668727875,
108
- "eval_precision": 0.8264900662251655,
109
- "eval_recall": 0.1908694654737325,
110
- "eval_runtime": 26.3196,
111
- "eval_samples_per_second": 308.135,
112
- "eval_steps_per_second": 19.263,
113
- "step": 4000
114
- },
115
- {
116
- "epoch": 2.2189349112426036,
117
- "grad_norm": 0.2717958092689514,
118
- "learning_rate": 1.5562130177514792e-05,
119
- "loss": 0.0495,
120
- "step": 4500
121
- },
122
- {
123
- "epoch": 2.465483234714004,
124
- "grad_norm": 0.4179055690765381,
125
- "learning_rate": 1.5069033530571993e-05,
126
- "loss": 0.0475,
127
- "step": 5000
128
- },
129
- {
130
- "epoch": 2.465483234714004,
131
- "eval_accuracy": 0.9854528962399195,
132
- "eval_f1": 0.4501232186863817,
133
- "eval_loss": 0.04653949663043022,
134
- "eval_precision": 0.7516550366791913,
135
- "eval_recall": 0.32125105146440314,
136
- "eval_runtime": 26.3415,
137
- "eval_samples_per_second": 307.879,
138
- "eval_steps_per_second": 19.247,
139
- "step": 5000
140
- },
141
- {
142
- "epoch": 2.712031558185404,
143
- "grad_norm": 0.3518264591693878,
144
- "learning_rate": 1.4575936883629191e-05,
145
- "loss": 0.0457,
146
- "step": 5500
147
- },
148
- {
149
- "epoch": 2.9585798816568047,
150
- "grad_norm": 0.4679907560348511,
151
- "learning_rate": 1.4082840236686392e-05,
152
- "loss": 0.0444,
153
- "step": 6000
154
- },
155
- {
156
- "epoch": 2.9585798816568047,
157
- "eval_accuracy": 0.9860665844636252,
158
- "eval_f1": 0.4852610084297608,
159
- "eval_loss": 0.043688371777534485,
160
- "eval_precision": 0.7695117901029558,
161
- "eval_recall": 0.3543626213963447,
162
- "eval_runtime": 26.4572,
163
- "eval_samples_per_second": 306.533,
164
- "eval_steps_per_second": 19.163,
165
- "step": 6000
166
- },
167
- {
168
- "epoch": 3.2051282051282053,
169
- "grad_norm": 0.21664711833000183,
170
- "learning_rate": 1.3589743589743592e-05,
171
- "loss": 0.0422,
172
- "step": 6500
173
- },
174
- {
175
- "epoch": 3.4516765285996054,
176
- "grad_norm": 0.33109939098358154,
177
- "learning_rate": 1.309664694280079e-05,
178
- "loss": 0.0394,
179
- "step": 7000
180
- },
181
- {
182
- "epoch": 3.4516765285996054,
183
- "eval_accuracy": 0.9865711410632538,
184
- "eval_f1": 0.5374664388577007,
185
- "eval_loss": 0.04136768355965614,
186
- "eval_precision": 0.7431155507559395,
187
- "eval_recall": 0.4209681119522826,
188
- "eval_runtime": 26.4486,
189
- "eval_samples_per_second": 306.632,
190
- "eval_steps_per_second": 19.169,
191
- "step": 7000
192
- },
193
- {
194
- "epoch": 3.698224852071006,
195
- "grad_norm": 0.3816561996936798,
196
- "learning_rate": 1.2603550295857989e-05,
197
- "loss": 0.0403,
198
- "step": 7500
199
- },
200
- {
201
- "epoch": 3.9447731755424065,
202
- "grad_norm": 0.44973593950271606,
203
- "learning_rate": 1.2110453648915189e-05,
204
- "loss": 0.039,
205
- "step": 8000
206
- },
207
- {
208
- "epoch": 3.9447731755424065,
209
- "eval_accuracy": 0.9868616862961861,
210
- "eval_f1": 0.5516540917005224,
211
- "eval_loss": 0.040279507637023926,
212
- "eval_precision": 0.750493485984998,
213
- "eval_recall": 0.43610919935765086,
214
- "eval_runtime": 27.0585,
215
- "eval_samples_per_second": 299.721,
216
- "eval_steps_per_second": 18.737,
217
- "step": 8000
218
- },
219
- {
220
- "epoch": 4.191321499013807,
221
- "grad_norm": 0.43002060055732727,
222
- "learning_rate": 1.1617357001972386e-05,
223
- "loss": 0.0363,
224
- "step": 8500
225
- },
226
- {
227
- "epoch": 4.437869822485207,
228
- "grad_norm": 0.3201405704021454,
229
- "learning_rate": 1.1124260355029586e-05,
230
- "loss": 0.0356,
231
- "step": 9000
232
- },
233
- {
234
- "epoch": 4.437869822485207,
235
- "eval_accuracy": 0.9870360134359454,
236
- "eval_f1": 0.5708655876143561,
237
- "eval_loss": 0.03906617686152458,
238
- "eval_precision": 0.7385287691187181,
239
- "eval_recall": 0.465244322092223,
240
- "eval_runtime": 26.4636,
241
- "eval_samples_per_second": 306.458,
242
- "eval_steps_per_second": 19.158,
243
- "step": 9000
244
- },
245
- {
246
- "epoch": 4.684418145956608,
247
- "grad_norm": 0.47237107157707214,
248
- "learning_rate": 1.0631163708086787e-05,
249
- "loss": 0.0359,
250
- "step": 9500
251
- },
252
- {
253
- "epoch": 4.930966469428008,
254
- "grad_norm": 0.5127544403076172,
255
- "learning_rate": 1.0138067061143987e-05,
256
- "loss": 0.0346,
257
- "step": 10000
258
- },
259
- {
260
- "epoch": 4.930966469428008,
261
- "eval_accuracy": 0.9873010473801324,
262
- "eval_f1": 0.5763593380614657,
263
- "eval_loss": 0.03822428733110428,
264
- "eval_precision": 0.7549857549857549,
265
- "eval_recall": 0.46608549361474344,
266
- "eval_runtime": 26.8486,
267
- "eval_samples_per_second": 302.064,
268
- "eval_steps_per_second": 18.884,
269
- "step": 10000
270
- },
271
- {
272
- "epoch": 5.177514792899408,
273
- "grad_norm": 0.2535999119281769,
274
- "learning_rate": 9.644970414201184e-06,
275
- "loss": 0.0321,
276
- "step": 10500
277
- },
278
- {
279
- "epoch": 5.424063116370808,
280
- "grad_norm": 0.3756001591682434,
281
- "learning_rate": 9.151873767258384e-06,
282
- "loss": 0.0317,
283
- "step": 11000
284
- },
285
- {
286
- "epoch": 5.424063116370808,
287
- "eval_accuracy": 0.9874201000609436,
288
- "eval_f1": 0.5807670508218401,
289
- "eval_loss": 0.03766867518424988,
290
- "eval_precision": 0.7594811612106238,
291
- "eval_recall": 0.47013841095052383,
292
- "eval_runtime": 27.4901,
293
- "eval_samples_per_second": 295.015,
294
- "eval_steps_per_second": 18.443,
295
- "step": 11000
296
- },
297
- {
298
- "epoch": 5.670611439842209,
299
- "grad_norm": 0.3415575325489044,
300
- "learning_rate": 8.658777120315582e-06,
301
- "loss": 0.0325,
302
- "step": 11500
303
- },
304
- {
305
- "epoch": 5.9171597633136095,
306
- "grad_norm": 0.37248894572257996,
307
- "learning_rate": 8.165680473372781e-06,
308
- "loss": 0.0308,
309
- "step": 12000
310
- },
311
- {
312
- "epoch": 5.9171597633136095,
313
- "eval_accuracy": 0.9873846677154641,
314
- "eval_f1": 0.5882788288079929,
315
- "eval_loss": 0.0372396819293499,
316
- "eval_precision": 0.7444392413954577,
317
- "eval_recall": 0.4862736101552344,
318
- "eval_runtime": 26.4199,
319
- "eval_samples_per_second": 306.965,
320
- "eval_steps_per_second": 19.19,
321
- "step": 12000
322
- },
323
- {
324
- "epoch": 6.16370808678501,
325
- "grad_norm": 0.47570303082466125,
326
- "learning_rate": 7.67258382642998e-06,
327
- "loss": 0.0292,
328
- "step": 12500
329
- },
330
- {
331
- "epoch": 6.410256410256411,
332
- "grad_norm": 0.4004118740558624,
333
- "learning_rate": 7.17948717948718e-06,
334
- "loss": 0.0288,
335
- "step": 13000
336
- },
337
- {
338
- "epoch": 6.410256410256411,
339
- "eval_accuracy": 0.9875915926130646,
340
- "eval_f1": 0.5967110415035239,
341
- "eval_loss": 0.036796875298023224,
342
- "eval_precision": 0.7503475440222428,
343
- "eval_recall": 0.4952970864877265,
344
- "eval_runtime": 26.3632,
345
- "eval_samples_per_second": 307.626,
346
- "eval_steps_per_second": 19.231,
347
- "step": 13000
348
- },
349
- {
350
- "epoch": 6.65680473372781,
351
- "grad_norm": 0.32691922783851624,
352
- "learning_rate": 6.686390532544379e-06,
353
- "loss": 0.0279,
354
- "step": 13500
355
- },
356
- {
357
- "epoch": 6.903353057199211,
358
- "grad_norm": 0.3402620553970337,
359
- "learning_rate": 6.193293885601579e-06,
360
- "loss": 0.0284,
361
- "step": 14000
362
- },
363
- {
364
- "epoch": 6.903353057199211,
365
- "eval_accuracy": 0.9876780475360347,
366
- "eval_f1": 0.6049259292920113,
367
- "eval_loss": 0.036554474383592606,
368
- "eval_precision": 0.745436219061485,
369
- "eval_recall": 0.5089852412632867,
370
- "eval_runtime": 26.4132,
371
- "eval_samples_per_second": 307.043,
372
- "eval_steps_per_second": 19.195,
373
- "step": 14000
374
- },
375
- {
376
- "epoch": 7.149901380670611,
377
- "grad_norm": 0.33896222710609436,
378
- "learning_rate": 5.700197238658778e-06,
379
- "loss": 0.0272,
380
- "step": 14500
381
- },
382
- {
383
- "epoch": 7.396449704142012,
384
- "grad_norm": 0.5163460373878479,
385
- "learning_rate": 5.207100591715976e-06,
386
- "loss": 0.026,
387
- "step": 15000
388
- },
389
- {
390
- "epoch": 7.396449704142012,
391
- "eval_accuracy": 0.9877928483353884,
392
- "eval_f1": 0.6092991608074394,
393
- "eval_loss": 0.0364510752260685,
394
- "eval_precision": 0.7488849241748439,
395
- "eval_recall": 0.5135734495679437,
396
- "eval_runtime": 26.576,
397
- "eval_samples_per_second": 305.162,
398
- "eval_steps_per_second": 19.077,
399
- "step": 15000
400
- },
401
- {
402
- "epoch": 7.642998027613412,
403
- "grad_norm": 0.29480230808258057,
404
- "learning_rate": 4.714003944773176e-06,
405
- "loss": 0.0262,
406
- "step": 15500
407
- },
408
- {
409
- "epoch": 7.889546351084813,
410
- "grad_norm": 0.37596189975738525,
411
- "learning_rate": 4.220907297830375e-06,
412
- "loss": 0.0268,
413
- "step": 16000
414
- },
415
- {
416
- "epoch": 7.889546351084813,
417
- "eval_accuracy": 0.9879019799594654,
418
- "eval_f1": 0.6212282570110046,
419
- "eval_loss": 0.036032263189554214,
420
- "eval_precision": 0.7400359446030236,
421
- "eval_recall": 0.5352909688766536,
422
- "eval_runtime": 26.3511,
423
- "eval_samples_per_second": 307.767,
424
- "eval_steps_per_second": 19.24,
425
- "step": 16000
426
- },
427
- {
428
- "epoch": 8.136094674556213,
429
- "grad_norm": 0.1833580881357193,
430
- "learning_rate": 3.7278106508875745e-06,
431
- "loss": 0.025,
432
- "step": 16500
433
- },
434
- {
435
- "epoch": 8.382642998027613,
436
- "grad_norm": 0.4305359125137329,
437
- "learning_rate": 3.234714003944773e-06,
438
- "loss": 0.0248,
439
- "step": 17000
440
- },
441
- {
442
- "epoch": 8.382642998027613,
443
- "eval_accuracy": 0.9878098558612186,
444
- "eval_f1": 0.6178861788617886,
445
- "eval_loss": 0.03607296571135521,
446
- "eval_precision": 0.7372773536895675,
447
- "eval_recall": 0.53177334250975,
448
- "eval_runtime": 26.3987,
449
- "eval_samples_per_second": 307.212,
450
- "eval_steps_per_second": 19.205,
451
- "step": 17000
452
- },
453
- {
454
- "epoch": 8.629191321499015,
455
- "grad_norm": 0.4935995936393738,
456
- "learning_rate": 2.7416173570019726e-06,
457
- "loss": 0.0243,
458
- "step": 17500
459
- },
460
- {
461
- "epoch": 8.875739644970414,
462
- "grad_norm": 0.5695288181304932,
463
- "learning_rate": 2.2485207100591717e-06,
464
- "loss": 0.0247,
465
- "step": 18000
466
- },
467
- {
468
- "epoch": 8.875739644970414,
469
- "eval_accuracy": 0.9878693822016242,
470
- "eval_f1": 0.6174748603351955,
471
- "eval_loss": 0.035977210849523544,
472
- "eval_precision": 0.7429554742955474,
473
- "eval_recall": 0.5282557161428462,
474
- "eval_runtime": 26.442,
475
- "eval_samples_per_second": 306.709,
476
- "eval_steps_per_second": 19.174,
477
- "step": 18000
478
- },
479
- {
480
- "epoch": 9.122287968441814,
481
- "grad_norm": 0.3517369031906128,
482
- "learning_rate": 1.755424063116371e-06,
483
- "loss": 0.0238,
484
- "step": 18500
485
- },
486
- {
487
- "epoch": 9.368836291913215,
488
- "grad_norm": 0.5258517265319824,
489
- "learning_rate": 1.2623274161735703e-06,
490
- "loss": 0.0236,
491
- "step": 19000
492
- },
493
- {
494
- "epoch": 9.368836291913215,
495
- "eval_accuracy": 0.9880196153464574,
496
- "eval_f1": 0.6215357062905753,
497
- "eval_loss": 0.035947080701589584,
498
- "eval_precision": 0.7497299632750054,
499
- "eval_recall": 0.5307792307104076,
500
- "eval_runtime": 26.2793,
501
- "eval_samples_per_second": 308.608,
502
- "eval_steps_per_second": 19.293,
503
- "step": 19000
504
- },
505
- {
506
- "epoch": 9.615384615384615,
507
- "grad_norm": 0.4185117185115814,
508
- "learning_rate": 7.692307692307694e-07,
509
- "loss": 0.0233,
510
- "step": 19500
511
- },
512
- {
513
- "epoch": 9.861932938856016,
514
- "grad_norm": 0.34625717997550964,
515
- "learning_rate": 2.7613412228796843e-07,
516
- "loss": 0.0229,
517
- "step": 20000
518
- },
519
- {
520
- "epoch": 9.861932938856016,
521
- "eval_accuracy": 0.987977096531882,
522
- "eval_f1": 0.6214467401490472,
523
- "eval_loss": 0.03578707203269005,
524
- "eval_precision": 0.7461423060437206,
525
- "eval_recall": 0.5324615737554484,
526
- "eval_runtime": 27.6359,
527
- "eval_samples_per_second": 293.458,
528
- "eval_steps_per_second": 18.346,
529
- "step": 20000
530
  }
531
  ],
532
  "logging_steps": 500,
533
- "max_steps": 20280,
534
  "num_input_tokens_seen": 0,
535
- "num_train_epochs": 10,
536
  "save_steps": 1000,
537
  "stateful_callbacks": {
 
 
 
 
 
 
 
 
 
538
  "TrainerControl": {
539
  "args": {
540
  "should_epoch_stop": false,
@@ -546,7 +61,7 @@
546
  "attributes": {}
547
  }
548
  },
549
- "total_flos": 452312037970056.0,
550
  "train_batch_size": 4,
551
  "trial_name": null,
552
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.08824141323566437,
3
+ "best_model_checkpoint": "doc-topic-model_eval-00_train-01/checkpoint-1000",
4
+ "epoch": 0.4930966469428008,
5
  "eval_steps": 1000,
6
+ "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.2465483234714004,
13
+ "grad_norm": 0.32877257466316223,
14
+ "learning_rate": 1.9950690335305722e-05,
15
+ "loss": 0.166,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 0.4930966469428008,
20
+ "grad_norm": 0.3466370701789856,
21
+ "learning_rate": 1.9901380670611442e-05,
22
  "loss": 0.0936,
23
  "step": 1000
24
  },
 
26
  "epoch": 0.4930966469428008,
27
  "eval_accuracy": 0.9814660487265615,
28
  "eval_f1": 0.0,
29
+ "eval_loss": 0.08824141323566437,
30
  "eval_precision": 0.0,
31
  "eval_recall": 0.0,
32
+ "eval_runtime": 11.7844,
33
+ "eval_samples_per_second": 688.2,
34
+ "eval_steps_per_second": 2.715,
35
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  }
37
  ],
38
  "logging_steps": 500,
39
+ "max_steps": 202800,
40
  "num_input_tokens_seen": 0,
41
+ "num_train_epochs": 100,
42
  "save_steps": 1000,
43
  "stateful_callbacks": {
44
+ "EarlyStoppingCallback": {
45
+ "args": {
46
+ "early_stopping_patience": 5,
47
+ "early_stopping_threshold": 0.0
48
+ },
49
+ "attributes": {
50
+ "early_stopping_patience_counter": 0
51
+ }
52
+ },
53
  "TrainerControl": {
54
  "args": {
55
  "should_epoch_stop": false,
 
61
  "attributes": {}
62
  }
63
  },
64
+ "total_flos": 22728529355328.0,
65
  "train_batch_size": 4,
66
  "trial_name": null,
67
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:71f491aa3c6c9aaa72371007033b32fdf24de876c9ab31ea4ee1caa9108217ee
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3238ded44964259c0f3714028dd1f6d78a9114d80b100ba672efc9a29ee50e3
3
  size 5240