dannoncaffeine commited on
Commit
adec14e
·
1 Parent(s): 4f81d5a

feat: add notebook

Browse files
Files changed (1) hide show
  1. GPT2_124M_wikitext_v0_1.ipynb +911 -0
GPT2_124M_wikitext_v0_1.ipynb ADDED
@@ -0,0 +1,911 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": [],
7
+ "gpuType": "T4"
8
+ },
9
+ "kernelspec": {
10
+ "name": "python3",
11
+ "display_name": "Python 3"
12
+ },
13
+ "language_info": {
14
+ "name": "python"
15
+ },
16
+ "accelerator": "GPU",
17
+ "widgets": {
18
+ "application/vnd.jupyter.widget-state+json": {
19
+ "72d3f33c56a14b01bef05ea2ae98e72f": {
20
+ "model_module": "@jupyter-widgets/controls",
21
+ "model_name": "VBoxModel",
22
+ "model_module_version": "1.5.0",
23
+ "state": {
24
+ "_dom_classes": [],
25
+ "_model_module": "@jupyter-widgets/controls",
26
+ "_model_module_version": "1.5.0",
27
+ "_model_name": "VBoxModel",
28
+ "_view_count": null,
29
+ "_view_module": "@jupyter-widgets/controls",
30
+ "_view_module_version": "1.5.0",
31
+ "_view_name": "VBoxView",
32
+ "box_style": "",
33
+ "children": [
34
+ "IPY_MODEL_8d44cf16b34d40bd8702560ae149192c",
35
+ "IPY_MODEL_13af4957747d40bbb9501bc13c3c160b",
36
+ "IPY_MODEL_1be0b602c74f4a7182bd041aeba58112",
37
+ "IPY_MODEL_af225dc2d79649c2ac0e853c517a0482",
38
+ "IPY_MODEL_f019d8eee73f4b7b8312029f03040c45"
39
+ ],
40
+ "layout": "IPY_MODEL_d1deb908be244cfb8dd253b3cc081510"
41
+ }
42
+ },
43
+ "8d44cf16b34d40bd8702560ae149192c": {
44
+ "model_module": "@jupyter-widgets/controls",
45
+ "model_name": "HTMLModel",
46
+ "model_module_version": "1.5.0",
47
+ "state": {
48
+ "_dom_classes": [],
49
+ "_model_module": "@jupyter-widgets/controls",
50
+ "_model_module_version": "1.5.0",
51
+ "_model_name": "HTMLModel",
52
+ "_view_count": null,
53
+ "_view_module": "@jupyter-widgets/controls",
54
+ "_view_module_version": "1.5.0",
55
+ "_view_name": "HTMLView",
56
+ "description": "",
57
+ "description_tooltip": null,
58
+ "layout": "IPY_MODEL_06e659f7bd9149e784a5cb68efbad736",
59
+ "placeholder": "​",
60
+ "style": "IPY_MODEL_ba1f1326e12d437a9fd76faa226eec44",
61
+ "value": "<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svg\nalt='Hugging Face'> <br> Copy a token from <a\nhref=\"https://huggingface.co/settings/tokens\" target=\"_blank\">your Hugging Face\ntokens page</a> and paste it below. <br> Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. </center>"
62
+ }
63
+ },
64
+ "13af4957747d40bbb9501bc13c3c160b": {
65
+ "model_module": "@jupyter-widgets/controls",
66
+ "model_name": "PasswordModel",
67
+ "model_module_version": "1.5.0",
68
+ "state": {
69
+ "_dom_classes": [],
70
+ "_model_module": "@jupyter-widgets/controls",
71
+ "_model_module_version": "1.5.0",
72
+ "_model_name": "PasswordModel",
73
+ "_view_count": null,
74
+ "_view_module": "@jupyter-widgets/controls",
75
+ "_view_module_version": "1.5.0",
76
+ "_view_name": "PasswordView",
77
+ "continuous_update": true,
78
+ "description": "Token:",
79
+ "description_tooltip": null,
80
+ "disabled": false,
81
+ "layout": "IPY_MODEL_65ea346ce3174bb098344531db439eac",
82
+ "placeholder": "​",
83
+ "style": "IPY_MODEL_6ac7576be7204aeca73385c48e1c5d0a",
84
+ "value": "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"
85
+ }
86
+ },
87
+ "1be0b602c74f4a7182bd041aeba58112": {
88
+ "model_module": "@jupyter-widgets/controls",
89
+ "model_name": "CheckboxModel",
90
+ "model_module_version": "1.5.0",
91
+ "state": {
92
+ "_dom_classes": [],
93
+ "_model_module": "@jupyter-widgets/controls",
94
+ "_model_module_version": "1.5.0",
95
+ "_model_name": "CheckboxModel",
96
+ "_view_count": null,
97
+ "_view_module": "@jupyter-widgets/controls",
98
+ "_view_module_version": "1.5.0",
99
+ "_view_name": "CheckboxView",
100
+ "description": "Add token as git credential?",
101
+ "description_tooltip": null,
102
+ "disabled": false,
103
+ "indent": true,
104
+ "layout": "IPY_MODEL_7fd3c4de83234008a72d9541c29ce765",
105
+ "style": "IPY_MODEL_b81585732ca14c89ae1b5bb25735cd62",
106
+ "value": true
107
+ }
108
+ },
109
+ "af225dc2d79649c2ac0e853c517a0482": {
110
+ "model_module": "@jupyter-widgets/controls",
111
+ "model_name": "ButtonModel",
112
+ "model_module_version": "1.5.0",
113
+ "state": {
114
+ "_dom_classes": [],
115
+ "_model_module": "@jupyter-widgets/controls",
116
+ "_model_module_version": "1.5.0",
117
+ "_model_name": "ButtonModel",
118
+ "_view_count": null,
119
+ "_view_module": "@jupyter-widgets/controls",
120
+ "_view_module_version": "1.5.0",
121
+ "_view_name": "ButtonView",
122
+ "button_style": "",
123
+ "description": "Login",
124
+ "disabled": false,
125
+ "icon": "",
126
+ "layout": "IPY_MODEL_a68a4415305045c0a89c60349175cc82",
127
+ "style": "IPY_MODEL_7316ece4ad474193bcc5b5db55632561",
128
+ "tooltip": ""
129
+ }
130
+ },
131
+ "f019d8eee73f4b7b8312029f03040c45": {
132
+ "model_module": "@jupyter-widgets/controls",
133
+ "model_name": "HTMLModel",
134
+ "model_module_version": "1.5.0",
135
+ "state": {
136
+ "_dom_classes": [],
137
+ "_model_module": "@jupyter-widgets/controls",
138
+ "_model_module_version": "1.5.0",
139
+ "_model_name": "HTMLModel",
140
+ "_view_count": null,
141
+ "_view_module": "@jupyter-widgets/controls",
142
+ "_view_module_version": "1.5.0",
143
+ "_view_name": "HTMLView",
144
+ "description": "",
145
+ "description_tooltip": null,
146
+ "layout": "IPY_MODEL_c0bb9ace36d64b8296bd35d538bd4f7e",
147
+ "placeholder": "​",
148
+ "style": "IPY_MODEL_03ad39c22f644e55bcffa070e3832582",
149
+ "value": "\n<b>Pro Tip:</b> If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. </center>"
150
+ }
151
+ },
152
+ "d1deb908be244cfb8dd253b3cc081510": {
153
+ "model_module": "@jupyter-widgets/base",
154
+ "model_name": "LayoutModel",
155
+ "model_module_version": "1.2.0",
156
+ "state": {
157
+ "_model_module": "@jupyter-widgets/base",
158
+ "_model_module_version": "1.2.0",
159
+ "_model_name": "LayoutModel",
160
+ "_view_count": null,
161
+ "_view_module": "@jupyter-widgets/base",
162
+ "_view_module_version": "1.2.0",
163
+ "_view_name": "LayoutView",
164
+ "align_content": null,
165
+ "align_items": "center",
166
+ "align_self": null,
167
+ "border": null,
168
+ "bottom": null,
169
+ "display": "flex",
170
+ "flex": null,
171
+ "flex_flow": "column",
172
+ "grid_area": null,
173
+ "grid_auto_columns": null,
174
+ "grid_auto_flow": null,
175
+ "grid_auto_rows": null,
176
+ "grid_column": null,
177
+ "grid_gap": null,
178
+ "grid_row": null,
179
+ "grid_template_areas": null,
180
+ "grid_template_columns": null,
181
+ "grid_template_rows": null,
182
+ "height": null,
183
+ "justify_content": null,
184
+ "justify_items": null,
185
+ "left": null,
186
+ "margin": null,
187
+ "max_height": null,
188
+ "max_width": null,
189
+ "min_height": null,
190
+ "min_width": null,
191
+ "object_fit": null,
192
+ "object_position": null,
193
+ "order": null,
194
+ "overflow": null,
195
+ "overflow_x": null,
196
+ "overflow_y": null,
197
+ "padding": null,
198
+ "right": null,
199
+ "top": null,
200
+ "visibility": null,
201
+ "width": "50%"
202
+ }
203
+ },
204
+ "06e659f7bd9149e784a5cb68efbad736": {
205
+ "model_module": "@jupyter-widgets/base",
206
+ "model_name": "LayoutModel",
207
+ "model_module_version": "1.2.0",
208
+ "state": {
209
+ "_model_module": "@jupyter-widgets/base",
210
+ "_model_module_version": "1.2.0",
211
+ "_model_name": "LayoutModel",
212
+ "_view_count": null,
213
+ "_view_module": "@jupyter-widgets/base",
214
+ "_view_module_version": "1.2.0",
215
+ "_view_name": "LayoutView",
216
+ "align_content": null,
217
+ "align_items": null,
218
+ "align_self": null,
219
+ "border": null,
220
+ "bottom": null,
221
+ "display": null,
222
+ "flex": null,
223
+ "flex_flow": null,
224
+ "grid_area": null,
225
+ "grid_auto_columns": null,
226
+ "grid_auto_flow": null,
227
+ "grid_auto_rows": null,
228
+ "grid_column": null,
229
+ "grid_gap": null,
230
+ "grid_row": null,
231
+ "grid_template_areas": null,
232
+ "grid_template_columns": null,
233
+ "grid_template_rows": null,
234
+ "height": null,
235
+ "justify_content": null,
236
+ "justify_items": null,
237
+ "left": null,
238
+ "margin": null,
239
+ "max_height": null,
240
+ "max_width": null,
241
+ "min_height": null,
242
+ "min_width": null,
243
+ "object_fit": null,
244
+ "object_position": null,
245
+ "order": null,
246
+ "overflow": null,
247
+ "overflow_x": null,
248
+ "overflow_y": null,
249
+ "padding": null,
250
+ "right": null,
251
+ "top": null,
252
+ "visibility": null,
253
+ "width": null
254
+ }
255
+ },
256
+ "ba1f1326e12d437a9fd76faa226eec44": {
257
+ "model_module": "@jupyter-widgets/controls",
258
+ "model_name": "DescriptionStyleModel",
259
+ "model_module_version": "1.5.0",
260
+ "state": {
261
+ "_model_module": "@jupyter-widgets/controls",
262
+ "_model_module_version": "1.5.0",
263
+ "_model_name": "DescriptionStyleModel",
264
+ "_view_count": null,
265
+ "_view_module": "@jupyter-widgets/base",
266
+ "_view_module_version": "1.2.0",
267
+ "_view_name": "StyleView",
268
+ "description_width": ""
269
+ }
270
+ },
271
+ "65ea346ce3174bb098344531db439eac": {
272
+ "model_module": "@jupyter-widgets/base",
273
+ "model_name": "LayoutModel",
274
+ "model_module_version": "1.2.0",
275
+ "state": {
276
+ "_model_module": "@jupyter-widgets/base",
277
+ "_model_module_version": "1.2.0",
278
+ "_model_name": "LayoutModel",
279
+ "_view_count": null,
280
+ "_view_module": "@jupyter-widgets/base",
281
+ "_view_module_version": "1.2.0",
282
+ "_view_name": "LayoutView",
283
+ "align_content": null,
284
+ "align_items": null,
285
+ "align_self": null,
286
+ "border": null,
287
+ "bottom": null,
288
+ "display": null,
289
+ "flex": null,
290
+ "flex_flow": null,
291
+ "grid_area": null,
292
+ "grid_auto_columns": null,
293
+ "grid_auto_flow": null,
294
+ "grid_auto_rows": null,
295
+ "grid_column": null,
296
+ "grid_gap": null,
297
+ "grid_row": null,
298
+ "grid_template_areas": null,
299
+ "grid_template_columns": null,
300
+ "grid_template_rows": null,
301
+ "height": null,
302
+ "justify_content": null,
303
+ "justify_items": null,
304
+ "left": null,
305
+ "margin": null,
306
+ "max_height": null,
307
+ "max_width": null,
308
+ "min_height": null,
309
+ "min_width": null,
310
+ "object_fit": null,
311
+ "object_position": null,
312
+ "order": null,
313
+ "overflow": null,
314
+ "overflow_x": null,
315
+ "overflow_y": null,
316
+ "padding": null,
317
+ "right": null,
318
+ "top": null,
319
+ "visibility": null,
320
+ "width": null
321
+ }
322
+ },
323
+ "6ac7576be7204aeca73385c48e1c5d0a": {
324
+ "model_module": "@jupyter-widgets/controls",
325
+ "model_name": "DescriptionStyleModel",
326
+ "model_module_version": "1.5.0",
327
+ "state": {
328
+ "_model_module": "@jupyter-widgets/controls",
329
+ "_model_module_version": "1.5.0",
330
+ "_model_name": "DescriptionStyleModel",
331
+ "_view_count": null,
332
+ "_view_module": "@jupyter-widgets/base",
333
+ "_view_module_version": "1.2.0",
334
+ "_view_name": "StyleView",
335
+ "description_width": ""
336
+ }
337
+ },
338
+ "7fd3c4de83234008a72d9541c29ce765": {
339
+ "model_module": "@jupyter-widgets/base",
340
+ "model_name": "LayoutModel",
341
+ "model_module_version": "1.2.0",
342
+ "state": {
343
+ "_model_module": "@jupyter-widgets/base",
344
+ "_model_module_version": "1.2.0",
345
+ "_model_name": "LayoutModel",
346
+ "_view_count": null,
347
+ "_view_module": "@jupyter-widgets/base",
348
+ "_view_module_version": "1.2.0",
349
+ "_view_name": "LayoutView",
350
+ "align_content": null,
351
+ "align_items": null,
352
+ "align_self": null,
353
+ "border": null,
354
+ "bottom": null,
355
+ "display": null,
356
+ "flex": null,
357
+ "flex_flow": null,
358
+ "grid_area": null,
359
+ "grid_auto_columns": null,
360
+ "grid_auto_flow": null,
361
+ "grid_auto_rows": null,
362
+ "grid_column": null,
363
+ "grid_gap": null,
364
+ "grid_row": null,
365
+ "grid_template_areas": null,
366
+ "grid_template_columns": null,
367
+ "grid_template_rows": null,
368
+ "height": null,
369
+ "justify_content": null,
370
+ "justify_items": null,
371
+ "left": null,
372
+ "margin": null,
373
+ "max_height": null,
374
+ "max_width": null,
375
+ "min_height": null,
376
+ "min_width": null,
377
+ "object_fit": null,
378
+ "object_position": null,
379
+ "order": null,
380
+ "overflow": null,
381
+ "overflow_x": null,
382
+ "overflow_y": null,
383
+ "padding": null,
384
+ "right": null,
385
+ "top": null,
386
+ "visibility": null,
387
+ "width": null
388
+ }
389
+ },
390
+ "b81585732ca14c89ae1b5bb25735cd62": {
391
+ "model_module": "@jupyter-widgets/controls",
392
+ "model_name": "DescriptionStyleModel",
393
+ "model_module_version": "1.5.0",
394
+ "state": {
395
+ "_model_module": "@jupyter-widgets/controls",
396
+ "_model_module_version": "1.5.0",
397
+ "_model_name": "DescriptionStyleModel",
398
+ "_view_count": null,
399
+ "_view_module": "@jupyter-widgets/base",
400
+ "_view_module_version": "1.2.0",
401
+ "_view_name": "StyleView",
402
+ "description_width": ""
403
+ }
404
+ },
405
+ "a68a4415305045c0a89c60349175cc82": {
406
+ "model_module": "@jupyter-widgets/base",
407
+ "model_name": "LayoutModel",
408
+ "model_module_version": "1.2.0",
409
+ "state": {
410
+ "_model_module": "@jupyter-widgets/base",
411
+ "_model_module_version": "1.2.0",
412
+ "_model_name": "LayoutModel",
413
+ "_view_count": null,
414
+ "_view_module": "@jupyter-widgets/base",
415
+ "_view_module_version": "1.2.0",
416
+ "_view_name": "LayoutView",
417
+ "align_content": null,
418
+ "align_items": null,
419
+ "align_self": null,
420
+ "border": null,
421
+ "bottom": null,
422
+ "display": null,
423
+ "flex": null,
424
+ "flex_flow": null,
425
+ "grid_area": null,
426
+ "grid_auto_columns": null,
427
+ "grid_auto_flow": null,
428
+ "grid_auto_rows": null,
429
+ "grid_column": null,
430
+ "grid_gap": null,
431
+ "grid_row": null,
432
+ "grid_template_areas": null,
433
+ "grid_template_columns": null,
434
+ "grid_template_rows": null,
435
+ "height": null,
436
+ "justify_content": null,
437
+ "justify_items": null,
438
+ "left": null,
439
+ "margin": null,
440
+ "max_height": null,
441
+ "max_width": null,
442
+ "min_height": null,
443
+ "min_width": null,
444
+ "object_fit": null,
445
+ "object_position": null,
446
+ "order": null,
447
+ "overflow": null,
448
+ "overflow_x": null,
449
+ "overflow_y": null,
450
+ "padding": null,
451
+ "right": null,
452
+ "top": null,
453
+ "visibility": null,
454
+ "width": null
455
+ }
456
+ },
457
+ "7316ece4ad474193bcc5b5db55632561": {
458
+ "model_module": "@jupyter-widgets/controls",
459
+ "model_name": "ButtonStyleModel",
460
+ "model_module_version": "1.5.0",
461
+ "state": {
462
+ "_model_module": "@jupyter-widgets/controls",
463
+ "_model_module_version": "1.5.0",
464
+ "_model_name": "ButtonStyleModel",
465
+ "_view_count": null,
466
+ "_view_module": "@jupyter-widgets/base",
467
+ "_view_module_version": "1.2.0",
468
+ "_view_name": "StyleView",
469
+ "button_color": null,
470
+ "font_weight": ""
471
+ }
472
+ },
473
+ "c0bb9ace36d64b8296bd35d538bd4f7e": {
474
+ "model_module": "@jupyter-widgets/base",
475
+ "model_name": "LayoutModel",
476
+ "model_module_version": "1.2.0",
477
+ "state": {
478
+ "_model_module": "@jupyter-widgets/base",
479
+ "_model_module_version": "1.2.0",
480
+ "_model_name": "LayoutModel",
481
+ "_view_count": null,
482
+ "_view_module": "@jupyter-widgets/base",
483
+ "_view_module_version": "1.2.0",
484
+ "_view_name": "LayoutView",
485
+ "align_content": null,
486
+ "align_items": null,
487
+ "align_self": null,
488
+ "border": null,
489
+ "bottom": null,
490
+ "display": null,
491
+ "flex": null,
492
+ "flex_flow": null,
493
+ "grid_area": null,
494
+ "grid_auto_columns": null,
495
+ "grid_auto_flow": null,
496
+ "grid_auto_rows": null,
497
+ "grid_column": null,
498
+ "grid_gap": null,
499
+ "grid_row": null,
500
+ "grid_template_areas": null,
501
+ "grid_template_columns": null,
502
+ "grid_template_rows": null,
503
+ "height": null,
504
+ "justify_content": null,
505
+ "justify_items": null,
506
+ "left": null,
507
+ "margin": null,
508
+ "max_height": null,
509
+ "max_width": null,
510
+ "min_height": null,
511
+ "min_width": null,
512
+ "object_fit": null,
513
+ "object_position": null,
514
+ "order": null,
515
+ "overflow": null,
516
+ "overflow_x": null,
517
+ "overflow_y": null,
518
+ "padding": null,
519
+ "right": null,
520
+ "top": null,
521
+ "visibility": null,
522
+ "width": null
523
+ }
524
+ },
525
+ "03ad39c22f644e55bcffa070e3832582": {
526
+ "model_module": "@jupyter-widgets/controls",
527
+ "model_name": "DescriptionStyleModel",
528
+ "model_module_version": "1.5.0",
529
+ "state": {
530
+ "_model_module": "@jupyter-widgets/controls",
531
+ "_model_module_version": "1.5.0",
532
+ "_model_name": "DescriptionStyleModel",
533
+ "_view_count": null,
534
+ "_view_module": "@jupyter-widgets/base",
535
+ "_view_module_version": "1.2.0",
536
+ "_view_name": "StyleView",
537
+ "description_width": ""
538
+ }
539
+ }
540
+ }
541
+ }
542
+ },
543
+ "cells": [
544
+ {
545
+ "cell_type": "markdown",
546
+ "source": [
547
+ "# 🧠 GPT-2 124M Fine-tuned on Wikitext\n",
548
+ "\n",
549
+ "This is an experiment aiming to achieve practical understanding of learning 🤗 Transformers and 🤗 Datasets. To follow the guide outlined [Hugging Face Notebooks](https://github.com/huggingface/notebooks/blob/main/examples/language_modeling_from_scratch.ipynb)."
550
+ ],
551
+ "metadata": {
552
+ "id": "krQK4P9tWs_F"
553
+ }
554
+ },
555
+ {
556
+ "cell_type": "code",
557
+ "execution_count": 1,
558
+ "metadata": {
559
+ "colab": {
560
+ "base_uri": "https://localhost:8080/"
561
+ },
562
+ "id": "9-IVR9eMWY8v",
563
+ "outputId": "a2ebf62e-55b2-488e-95c8-b1ee10026308"
564
+ },
565
+ "outputs": [
566
+ {
567
+ "output_type": "stream",
568
+ "name": "stdout",
569
+ "text": [
570
+ "Requirement already satisfied: datasets in /usr/local/lib/python3.10/dist-packages (2.15.0)\n",
571
+ "Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.35.2)\n",
572
+ "Requirement already satisfied: accelerate in /usr/local/lib/python3.10/dist-packages (0.25.0)\n",
573
+ "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from datasets) (1.23.5)\n",
574
+ "Requirement already satisfied: pyarrow>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (9.0.0)\n",
575
+ "Requirement already satisfied: pyarrow-hotfix in /usr/local/lib/python3.10/dist-packages (from datasets) (0.6)\n",
576
+ "Requirement already satisfied: dill<0.3.8,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.3.7)\n",
577
+ "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets) (1.5.3)\n",
578
+ "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (2.31.0)\n",
579
+ "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (4.66.1)\n",
580
+ "Requirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from datasets) (3.4.1)\n",
581
+ "Requirement already satisfied: multiprocess in /usr/local/lib/python3.10/dist-packages (from datasets) (0.70.15)\n",
582
+ "Requirement already satisfied: fsspec[http]<=2023.10.0,>=2023.1.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (2023.6.0)\n",
583
+ "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets) (3.9.1)\n",
584
+ "Requirement already satisfied: huggingface-hub>=0.18.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (0.19.4)\n",
585
+ "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from datasets) (23.2)\n",
586
+ "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (6.0.1)\n",
587
+ "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.13.1)\n",
588
+ "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2023.6.3)\n",
589
+ "Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.15.0)\n",
590
+ "Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.4.1)\n",
591
+ "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate) (5.9.5)\n",
592
+ "Requirement already satisfied: torch>=1.10.0 in /usr/local/lib/python3.10/dist-packages (from accelerate) (2.1.0+cu118)\n",
593
+ "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (23.1.0)\n",
594
+ "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (6.0.4)\n",
595
+ "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.9.3)\n",
596
+ "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.4.0)\n",
597
+ "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.3.1)\n",
598
+ "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (4.0.3)\n",
599
+ "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.18.0->datasets) (4.5.0)\n",
600
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (3.3.2)\n",
601
+ "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (3.6)\n",
602
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (2.0.7)\n",
603
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (2023.11.17)\n",
604
+ "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (1.12)\n",
605
+ "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (3.2.1)\n",
606
+ "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (3.1.2)\n",
607
+ "Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (2.1.0)\n",
608
+ "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2.8.2)\n",
609
+ "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2023.3.post1)\n",
610
+ "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas->datasets) (1.16.0)\n",
611
+ "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.10.0->accelerate) (2.1.3)\n",
612
+ "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.10.0->accelerate) (1.3.0)\n"
613
+ ]
614
+ }
615
+ ],
616
+ "source": [
617
+ "# Install neccessary packages\n",
618
+ "!pip install datasets transformers accelerate"
619
+ ]
620
+ },
621
+ {
622
+ "cell_type": "code",
623
+ "source": [
624
+ "# Authenticate with HuggingFace Hub\n",
625
+ "from huggingface_hub import notebook_login\n",
626
+ "\n",
627
+ "notebook_login()"
628
+ ],
629
+ "metadata": {
630
+ "colab": {
631
+ "base_uri": "https://localhost:8080/",
632
+ "height": 331,
633
+ "referenced_widgets": [
634
+ "72d3f33c56a14b01bef05ea2ae98e72f",
635
+ "8d44cf16b34d40bd8702560ae149192c",
636
+ "13af4957747d40bbb9501bc13c3c160b",
637
+ "1be0b602c74f4a7182bd041aeba58112",
638
+ "af225dc2d79649c2ac0e853c517a0482",
639
+ "f019d8eee73f4b7b8312029f03040c45",
640
+ "d1deb908be244cfb8dd253b3cc081510",
641
+ "06e659f7bd9149e784a5cb68efbad736",
642
+ "ba1f1326e12d437a9fd76faa226eec44",
643
+ "65ea346ce3174bb098344531db439eac",
644
+ "6ac7576be7204aeca73385c48e1c5d0a",
645
+ "7fd3c4de83234008a72d9541c29ce765",
646
+ "b81585732ca14c89ae1b5bb25735cd62",
647
+ "a68a4415305045c0a89c60349175cc82",
648
+ "7316ece4ad474193bcc5b5db55632561",
649
+ "c0bb9ace36d64b8296bd35d538bd4f7e",
650
+ "03ad39c22f644e55bcffa070e3832582"
651
+ ]
652
+ },
653
+ "id": "lpuAz9I-XsQQ",
654
+ "outputId": "bf420c42-7ff0-4a4c-da57-420691eaec5d"
655
+ },
656
+ "execution_count": 2,
657
+ "outputs": [
658
+ {
659
+ "output_type": "display_data",
660
+ "data": {
661
+ "text/plain": [
662
+ "VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
663
+ ],
664
+ "application/vnd.jupyter.widget-view+json": {
665
+ "version_major": 2,
666
+ "version_minor": 0,
667
+ "model_id": "72d3f33c56a14b01bef05ea2ae98e72f"
668
+ }
669
+ },
670
+ "metadata": {}
671
+ }
672
+ ]
673
+ },
674
+ {
675
+ "cell_type": "code",
676
+ "source": [
677
+ "!apt install git-lfs"
678
+ ],
679
+ "metadata": {
680
+ "colab": {
681
+ "base_uri": "https://localhost:8080/"
682
+ },
683
+ "id": "ka0fqf1AYD2N",
684
+ "outputId": "8c1b6396-cd72-45fd-9d1f-4fd9c76b0966"
685
+ },
686
+ "execution_count": 3,
687
+ "outputs": [
688
+ {
689
+ "output_type": "stream",
690
+ "name": "stdout",
691
+ "text": [
692
+ "Reading package lists... Done\n",
693
+ "Building dependency tree... Done\n",
694
+ "Reading state information... Done\n",
695
+ "git-lfs is already the newest version (3.0.2-1ubuntu0.2).\n",
696
+ "0 upgraded, 0 newly installed, 0 to remove and 15 not upgraded.\n"
697
+ ]
698
+ }
699
+ ]
700
+ },
701
+ {
702
+ "cell_type": "code",
703
+ "source": [
704
+ "import transformers\n",
705
+ "\n",
706
+ "print(transformers.__version__)"
707
+ ],
708
+ "metadata": {
709
+ "colab": {
710
+ "base_uri": "https://localhost:8080/"
711
+ },
712
+ "id": "evGbNxr8YIA1",
713
+ "outputId": "08025e50-d542-4f92-8c33-505cd2b44802"
714
+ },
715
+ "execution_count": 4,
716
+ "outputs": [
717
+ {
718
+ "output_type": "stream",
719
+ "name": "stdout",
720
+ "text": [
721
+ "4.35.2\n"
722
+ ]
723
+ }
724
+ ]
725
+ },
726
+ {
727
+ "cell_type": "code",
728
+ "source": [
729
+ "# Load dataset\n",
730
+ "from datasets import load_dataset\n",
731
+ "\n",
732
+ "datasets = load_dataset('wikitext', 'wikitext-103-raw-v1')"
733
+ ],
734
+ "metadata": {
735
+ "id": "cdNnCQv4YMbT"
736
+ },
737
+ "execution_count": 5,
738
+ "outputs": []
739
+ },
740
+ {
741
+ "cell_type": "code",
742
+ "source": [
743
+ "from transformers import AutoTokenizer\n",
744
+ "\n",
745
+ "model=\"gpt2\"\n",
746
+ "\n",
747
+ "tokenizer = AutoTokenizer.from_pretrained(model)\n",
748
+ "\n",
749
+ "def tokenize_function(examples):\n",
750
+ " return tokenizer(examples[\"text\"])\n",
751
+ "\n",
752
+ "tokenized_datasets = datasets.map(tokenize_function, batched=True, num_proc=4, remove_columns=[\"text\"])"
753
+ ],
754
+ "metadata": {
755
+ "id": "RtYDKWCBY76H"
756
+ },
757
+ "execution_count": 6,
758
+ "outputs": []
759
+ },
760
+ {
761
+ "cell_type": "code",
762
+ "source": [
763
+ "# block_size = tokenizer.model_max_length\n",
764
+ "block_size = 256\n",
765
+ "\n",
766
+ "# Function to group texts\n",
767
+ "def group_texts(examples):\n",
768
+ " # Concatenate all texts.\n",
769
+ " concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}\n",
770
+ " total_length = len(concatenated_examples[list(examples.keys())[0]])\n",
771
+ " # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can\n",
772
+ " # customize this part to your needs.\n",
773
+ " total_length = (total_length // block_size) * block_size\n",
774
+ " # Split by chunks of max_len.\n",
775
+ " result = {\n",
776
+ " k: [t[i : i + block_size] for i in range(0, total_length, block_size)]\n",
777
+ " for k, t in concatenated_examples.items()\n",
778
+ " }\n",
779
+ " result[\"labels\"] = result[\"input_ids\"].copy()\n",
780
+ " return result\n",
781
+ "\n",
782
+ "# Transform from tokenized dataset to lm dataset\n",
783
+ "lm_datasets = tokenized_datasets.map(\n",
784
+ " group_texts,\n",
785
+ " batched=True,\n",
786
+ " batch_size=1000,\n",
787
+ " num_proc=4,\n",
788
+ ")\n",
789
+ "\n",
790
+ "# Print a chunk of the dataset\n",
791
+ "print(tokenizer.decode(lm_datasets[\"train\"][1][\"input_ids\"]))"
792
+ ],
793
+ "metadata": {
794
+ "colab": {
795
+ "base_uri": "https://localhost:8080/"
796
+ },
797
+ "id": "ar5R7hnYZyUm",
798
+ "outputId": "ceb702a8-8e60-4850-dfa7-ce111c21d875"
799
+ },
800
+ "execution_count": 7,
801
+ "outputs": [
802
+ {
803
+ "output_type": "stream",
804
+ "name": "stdout",
805
+ "text": [
806
+ "awa. A large team of writers handled the script. The game's opening theme was sung by May 'n. \n",
807
+ " It met with positive sales in Japan, and was praised by both Japanese and western critics. After release, it received downloadable content, along with an expanded edition in November of that year. It was also adapted into manga and an original video animation series. Due to low sales of Valkyria Chronicles II, Valkyria Chronicles III was not localized, but a fan translation compatible with the game's expanded edition was released in 2014. Media.Vision would return to the franchise with the development of Valkyria : Azure Revolution for the PlayStation 4. \n",
808
+ " = = Gameplay = = \n",
809
+ " As with previous Valkyira Chronicles games, Valkyria Chronicles III is a tactical role @-@ playing game where players take control of a military unit and take part in missions against enemy forces. Stories are told through comic book @-@ like panels with animated character portraits, with characters speaking partially through voiced speech bubbles and partially through unvoiced text. The player progresses through a series of linear missions, gradually unlocked as maps that can be freely scanned through and replayed as they are unlocked. The route to each story location on the map varies\n"
810
+ ]
811
+ }
812
+ ]
813
+ },
814
+ {
815
+ "cell_type": "code",
816
+ "source": [
817
+ "# Load GPT2 model\n",
818
+ "from transformers import AutoModelForCausalLM, Trainer, TrainingArguments\n",
819
+ "import torch\n",
820
+ "\n",
821
+ "language_model = AutoModelForCausalLM.from_pretrained(model)\n",
822
+ "language_model.train()\n",
823
+ "\n",
824
+ "training_args = TrainingArguments(\n",
825
+ " f\"GPT2-124M-wikitext-v0.1\",\n",
826
+ " evaluation_strategy = \"epoch\",\n",
827
+ " num_train_epochs=5,\n",
828
+ " learning_rate=2e-5,\n",
829
+ " weight_decay=0.01,\n",
830
+ " push_to_hub=True\n",
831
+ ")"
832
+ ],
833
+ "metadata": {
834
+ "id": "RZhL1hiibVNK"
835
+ },
836
+ "execution_count": 8,
837
+ "outputs": []
838
+ },
839
+ {
840
+ "cell_type": "code",
841
+ "source": [
842
+ "# Clear CUDA cache\n",
843
+ "torch.cuda.empty_cache()\n",
844
+ "\n",
845
+ "trainer = Trainer(\n",
846
+ " model=language_model,\n",
847
+ " args=training_args,\n",
848
+ " train_dataset=lm_datasets[\"train\"],\n",
849
+ " eval_dataset=lm_datasets[\"validation\"],\n",
850
+ ")\n",
851
+ "\n",
852
+ "trainer.train()"
853
+ ],
854
+ "metadata": {
855
+ "colab": {
856
+ "base_uri": "https://localhost:8080/",
857
+ "height": 75
858
+ },
859
+ "id": "EvanxM9wfl6_",
860
+ "outputId": "194b1923-f8bd-40c8-b62a-a6ae136df8be"
861
+ },
862
+ "execution_count": null,
863
+ "outputs": [
864
+ {
865
+ "output_type": "display_data",
866
+ "data": {
867
+ "text/plain": [
868
+ "<IPython.core.display.HTML object>"
869
+ ],
870
+ "text/html": [
871
+ "\n",
872
+ " <div>\n",
873
+ " \n",
874
+ " <progress value='2797' max='287335' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
875
+ " [ 2797/287335 28:43 < 48:43:53, 1.62 it/s, Epoch 0.05/5]\n",
876
+ " </div>\n",
877
+ " <table border=\"1\" class=\"dataframe\">\n",
878
+ " <thead>\n",
879
+ " <tr style=\"text-align: left;\">\n",
880
+ " <th>Epoch</th>\n",
881
+ " <th>Training Loss</th>\n",
882
+ " <th>Validation Loss</th>\n",
883
+ " </tr>\n",
884
+ " </thead>\n",
885
+ " <tbody>\n",
886
+ " </tbody>\n",
887
+ "</table><p>"
888
+ ]
889
+ },
890
+ "metadata": {}
891
+ }
892
+ ]
893
+ },
894
+ {
895
+ "cell_type": "code",
896
+ "source": [
897
+ "import math\n",
898
+ "\n",
899
+ "eval_results = trainer.evaluate()\n",
900
+ "print(f\"Perplexity: {math.exp(eval_results['eval_loss']):.2f}\")\n",
901
+ "\n",
902
+ "trainer.push_to_hub()"
903
+ ],
904
+ "metadata": {
905
+ "id": "77GdAgpkiAOm"
906
+ },
907
+ "execution_count": null,
908
+ "outputs": []
909
+ }
910
+ ]
911
+ }