liwii commited on
Commit
947a99a
·
verified ·
1 Parent(s): f3f9491

Training in progress, epoch 2

Browse files
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:686fef64a02fcd9ed2e45d039710a9a9f082d3926dced4c99dc690869a8e0537
3
  size 274758317
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:390a1376b55d0a5cdb115d69efa353749e7692da857a69abd0246f3d656cf9af
3
  size 274758317
train_factual_consistency.ipynb CHANGED
@@ -129,23 +129,14 @@
129
  "id": "6bc83d4c-378c-4313-b641-8ead0c02f715",
130
  "metadata": {},
131
  "outputs": [
132
- {
133
- "name": "stderr",
134
- "output_type": "stream",
135
- "text": [
136
- "WARNING:root:XRT configuration not detected. Defaulting to preview PJRT runtime. To silence this warning and continue using PJRT, explicitly set PJRT_DEVICE to a supported device or configure XRT. To disable default device selection, set PJRT_SELECT_DEFAULT_DEVICE=0\n",
137
- "WARNING:root:For more information about the status of PJRT, see https://github.com/pytorch/xla/blob/master/docs/pjrt.md\n",
138
- "WARNING:root:Defaulting to PJRT_DEVICE=CPU\n"
139
- ]
140
- },
141
  {
142
  "data": {
143
  "text/html": [
144
  "\n",
145
  " <div>\n",
146
  " \n",
147
- " <progress value='2449' max='9180' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
148
- " [2449/9180 15:00 < 41:17, 2.72 it/s, Epoch 8/30]\n",
149
  " </div>\n",
150
  " <table border=\"1\" class=\"dataframe\">\n",
151
  " <thead>\n",
@@ -160,51 +151,15 @@
160
  " <tr>\n",
161
  " <td>1</td>\n",
162
  " <td>No log</td>\n",
163
- " <td>0.292851</td>\n",
164
- " <td>0.890625</td>\n",
165
- " </tr>\n",
166
- " <tr>\n",
167
- " <td>2</td>\n",
168
- " <td>0.417800</td>\n",
169
- " <td>0.252162</td>\n",
170
- " <td>0.890625</td>\n",
171
- " </tr>\n",
172
- " <tr>\n",
173
- " <td>3</td>\n",
174
- " <td>0.417800</td>\n",
175
- " <td>0.206412</td>\n",
176
- " <td>0.912109</td>\n",
177
- " </tr>\n",
178
- " <tr>\n",
179
- " <td>4</td>\n",
180
- " <td>0.189500</td>\n",
181
- " <td>0.255287</td>\n",
182
- " <td>0.906250</td>\n",
183
- " </tr>\n",
184
- " <tr>\n",
185
- " <td>5</td>\n",
186
- " <td>0.113500</td>\n",
187
- " <td>0.273709</td>\n",
188
- " <td>0.912109</td>\n",
189
- " </tr>\n",
190
- " <tr>\n",
191
- " <td>6</td>\n",
192
- " <td>0.113500</td>\n",
193
- " <td>0.319531</td>\n",
194
- " <td>0.904297</td>\n",
195
- " </tr>\n",
196
- " <tr>\n",
197
- " <td>7</td>\n",
198
- " <td>0.068500</td>\n",
199
- " <td>0.381726</td>\n",
200
- " <td>0.896484</td>\n",
201
  " </tr>\n",
202
  " </tbody>\n",
203
  "</table><p>\n",
204
  " <div>\n",
205
  " \n",
206
- " <progress value='59' max='64' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
207
- " [59/64 00:05 < 00:00, 10.02 it/s]\n",
208
  " </div>\n",
209
  " "
210
  ],
@@ -225,7 +180,7 @@
225
  " learning_rate=1e-4,\n",
226
  " per_device_train_batch_size=64,\n",
227
  " per_device_eval_batch_size=8,\n",
228
- " num_train_epochs=30,\n",
229
  " weight_decay=0.02,\n",
230
  " evaluation_strategy=\"epoch\",\n",
231
  " eval_accumulation_steps=4,\n",
 
129
  "id": "6bc83d4c-378c-4313-b641-8ead0c02f715",
130
  "metadata": {},
131
  "outputs": [
 
 
 
 
 
 
 
 
 
132
  {
133
  "data": {
134
  "text/html": [
135
  "\n",
136
  " <div>\n",
137
  " \n",
138
+ " <progress value='613' max='918' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
139
+ " [613/918 03:40 < 01:50, 2.77 it/s, Epoch 2/3]\n",
140
  " </div>\n",
141
  " <table border=\"1\" class=\"dataframe\">\n",
142
  " <thead>\n",
 
151
  " <tr>\n",
152
  " <td>1</td>\n",
153
  " <td>No log</td>\n",
154
+ " <td>0.283724</td>\n",
155
+ " <td>0.869141</td>\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
  " </tr>\n",
157
  " </tbody>\n",
158
  "</table><p>\n",
159
  " <div>\n",
160
  " \n",
161
+ " <progress value='48' max='64' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
162
+ " [48/64 00:05 < 00:01, 9.12 it/s]\n",
163
  " </div>\n",
164
  " "
165
  ],
 
180
  " learning_rate=1e-4,\n",
181
  " per_device_train_batch_size=64,\n",
182
  " per_device_eval_batch_size=8,\n",
183
+ " num_train_epochs=3,\n",
184
  " weight_decay=0.02,\n",
185
  " evaluation_strategy=\"epoch\",\n",
186
  " eval_accumulation_steps=4,\n",
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16a8fea2d112223bb5fc50f0e3b8457dcd3eefa65312f57e80712e85717a5f1f
3
  size 4155
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5d46324e9f1183ab9f5cd775916c015c2346d0e87006d76736ec786cf5a5ccd
3
  size 4155