codeShare commited on
Commit
88933bd
·
verified ·
1 Parent(s): f198c44

Upload sd_token_similarity_calculator.ipynb

Browse files
Files changed (1) hide show
  1. sd_token_similarity_calculator.ipynb +1228 -175
sd_token_similarity_calculator.ipynb CHANGED
@@ -14,7 +14,7 @@
14
  },
15
  "widgets": {
16
  "application/vnd.jupyter.widget-state+json": {
17
- "41caa8e5f4a14315bcf166f523b211f2": {
18
  "model_module": "@jupyter-widgets/controls",
19
  "model_name": "HBoxModel",
20
  "model_module_version": "1.5.0",
@@ -29,14 +29,14 @@
29
  "_view_name": "HBoxView",
30
  "box_style": "",
31
  "children": [
32
- "IPY_MODEL_3c332e856f4b4ae28f66f62eef1b70f3",
33
- "IPY_MODEL_91698f067b08434fb8fdc5ffe95556f0",
34
- "IPY_MODEL_260ccc18db5f4237824bf5f7b9d82f86"
35
  ],
36
- "layout": "IPY_MODEL_ca413af11195435c87c5ca5bcf584e20"
37
  }
38
  },
39
- "3c332e856f4b4ae28f66f62eef1b70f3": {
40
  "model_module": "@jupyter-widgets/controls",
41
  "model_name": "HTMLModel",
42
  "model_module_version": "1.5.0",
@@ -51,13 +51,13 @@
51
  "_view_name": "HTMLView",
52
  "description": "",
53
  "description_tooltip": null,
54
- "layout": "IPY_MODEL_fed70b803afd4765b07dfa06ad78c1e4",
55
  "placeholder": "​",
56
- "style": "IPY_MODEL_22c696eaecb541858b5e443073d4d5dc",
57
- "value": "preprocessor_config.json: 100%"
58
  }
59
  },
60
- "91698f067b08434fb8fdc5ffe95556f0": {
61
  "model_module": "@jupyter-widgets/controls",
62
  "model_name": "FloatProgressModel",
63
  "model_module_version": "1.5.0",
@@ -73,15 +73,15 @@
73
  "bar_style": "success",
74
  "description": "",
75
  "description_tooltip": null,
76
- "layout": "IPY_MODEL_c88fd9b677174ab39d9c123fcb2e4835",
77
- "max": 316,
78
  "min": 0,
79
  "orientation": "horizontal",
80
- "style": "IPY_MODEL_1755a7d4eb974639a5036829a0d9c105",
81
- "value": 316
82
  }
83
  },
84
- "260ccc18db5f4237824bf5f7b9d82f86": {
85
  "model_module": "@jupyter-widgets/controls",
86
  "model_name": "HTMLModel",
87
  "model_module_version": "1.5.0",
@@ -96,13 +96,13 @@
96
  "_view_name": "HTMLView",
97
  "description": "",
98
  "description_tooltip": null,
99
- "layout": "IPY_MODEL_d8f11fa03e7d4a1c93b24d29a799a762",
100
  "placeholder": "​",
101
- "style": "IPY_MODEL_36c842ffee2a4e8c8a7a9f3bf40c2408",
102
- "value": " 316/316 [00:00<00:00, 13.6kB/s]"
103
  }
104
  },
105
- "ca413af11195435c87c5ca5bcf584e20": {
106
  "model_module": "@jupyter-widgets/base",
107
  "model_name": "LayoutModel",
108
  "model_module_version": "1.2.0",
@@ -154,7 +154,7 @@
154
  "width": null
155
  }
156
  },
157
- "fed70b803afd4765b07dfa06ad78c1e4": {
158
  "model_module": "@jupyter-widgets/base",
159
  "model_name": "LayoutModel",
160
  "model_module_version": "1.2.0",
@@ -206,7 +206,7 @@
206
  "width": null
207
  }
208
  },
209
- "22c696eaecb541858b5e443073d4d5dc": {
210
  "model_module": "@jupyter-widgets/controls",
211
  "model_name": "DescriptionStyleModel",
212
  "model_module_version": "1.5.0",
@@ -221,7 +221,7 @@
221
  "description_width": ""
222
  }
223
  },
224
- "c88fd9b677174ab39d9c123fcb2e4835": {
225
  "model_module": "@jupyter-widgets/base",
226
  "model_name": "LayoutModel",
227
  "model_module_version": "1.2.0",
@@ -273,7 +273,7 @@
273
  "width": null
274
  }
275
  },
276
- "1755a7d4eb974639a5036829a0d9c105": {
277
  "model_module": "@jupyter-widgets/controls",
278
  "model_name": "ProgressStyleModel",
279
  "model_module_version": "1.5.0",
@@ -289,7 +289,7 @@
289
  "description_width": ""
290
  }
291
  },
292
- "d8f11fa03e7d4a1c93b24d29a799a762": {
293
  "model_module": "@jupyter-widgets/base",
294
  "model_name": "LayoutModel",
295
  "model_module_version": "1.2.0",
@@ -341,7 +341,7 @@
341
  "width": null
342
  }
343
  },
344
- "36c842ffee2a4e8c8a7a9f3bf40c2408": {
345
  "model_module": "@jupyter-widgets/controls",
346
  "model_name": "DescriptionStyleModel",
347
  "model_module_version": "1.5.0",
@@ -356,7 +356,7 @@
356
  "description_width": ""
357
  }
358
  },
359
- "585ad0a1c1cb43fdae92f59cc988d9d3": {
360
  "model_module": "@jupyter-widgets/controls",
361
  "model_name": "HBoxModel",
362
  "model_module_version": "1.5.0",
@@ -371,14 +371,14 @@
371
  "_view_name": "HBoxView",
372
  "box_style": "",
373
  "children": [
374
- "IPY_MODEL_4be1e52f00374023b9336c8455883830",
375
- "IPY_MODEL_fa35e472a2134522ad14f7ef5184d684",
376
- "IPY_MODEL_a34c8c99b1b1485c947484286bb7efc2"
377
  ],
378
- "layout": "IPY_MODEL_96300f7e29e04a60a8a76c93894b9396"
379
  }
380
  },
381
- "4be1e52f00374023b9336c8455883830": {
382
  "model_module": "@jupyter-widgets/controls",
383
  "model_name": "HTMLModel",
384
  "model_module_version": "1.5.0",
@@ -393,13 +393,13 @@
393
  "_view_name": "HTMLView",
394
  "description": "",
395
  "description_tooltip": null,
396
- "layout": "IPY_MODEL_6751a1816b444fa09b23aba3db5cfd0e",
397
  "placeholder": "​",
398
- "style": "IPY_MODEL_5f5a9841f3284c94b28668bd3b20024a",
399
- "value": "config.json: 100%"
400
  }
401
  },
402
- "fa35e472a2134522ad14f7ef5184d684": {
403
  "model_module": "@jupyter-widgets/controls",
404
  "model_name": "FloatProgressModel",
405
  "model_module_version": "1.5.0",
@@ -415,15 +415,15 @@
415
  "bar_style": "success",
416
  "description": "",
417
  "description_tooltip": null,
418
- "layout": "IPY_MODEL_31c3dbb150db4de9b4b745c55e6ae501",
419
- "max": 4519,
420
  "min": 0,
421
  "orientation": "horizontal",
422
- "style": "IPY_MODEL_a3d37f8f8bfe44f5a2106658629800c4",
423
- "value": 4519
424
  }
425
  },
426
- "a34c8c99b1b1485c947484286bb7efc2": {
427
  "model_module": "@jupyter-widgets/controls",
428
  "model_name": "HTMLModel",
429
  "model_module_version": "1.5.0",
@@ -438,13 +438,13 @@
438
  "_view_name": "HTMLView",
439
  "description": "",
440
  "description_tooltip": null,
441
- "layout": "IPY_MODEL_6b141c814285449cab9d58b3f28a31ca",
442
  "placeholder": "​",
443
- "style": "IPY_MODEL_8dbe0b9597f5405495a933f0d9751cb3",
444
- "value": " 4.52k/4.52k [00:00<00:00, 148kB/s]"
445
  }
446
  },
447
- "96300f7e29e04a60a8a76c93894b9396": {
448
  "model_module": "@jupyter-widgets/base",
449
  "model_name": "LayoutModel",
450
  "model_module_version": "1.2.0",
@@ -496,7 +496,7 @@
496
  "width": null
497
  }
498
  },
499
- "6751a1816b444fa09b23aba3db5cfd0e": {
500
  "model_module": "@jupyter-widgets/base",
501
  "model_name": "LayoutModel",
502
  "model_module_version": "1.2.0",
@@ -548,7 +548,7 @@
548
  "width": null
549
  }
550
  },
551
- "5f5a9841f3284c94b28668bd3b20024a": {
552
  "model_module": "@jupyter-widgets/controls",
553
  "model_name": "DescriptionStyleModel",
554
  "model_module_version": "1.5.0",
@@ -563,7 +563,7 @@
563
  "description_width": ""
564
  }
565
  },
566
- "31c3dbb150db4de9b4b745c55e6ae501": {
567
  "model_module": "@jupyter-widgets/base",
568
  "model_name": "LayoutModel",
569
  "model_module_version": "1.2.0",
@@ -615,7 +615,7 @@
615
  "width": null
616
  }
617
  },
618
- "a3d37f8f8bfe44f5a2106658629800c4": {
619
  "model_module": "@jupyter-widgets/controls",
620
  "model_name": "ProgressStyleModel",
621
  "model_module_version": "1.5.0",
@@ -631,7 +631,7 @@
631
  "description_width": ""
632
  }
633
  },
634
- "6b141c814285449cab9d58b3f28a31ca": {
635
  "model_module": "@jupyter-widgets/base",
636
  "model_name": "LayoutModel",
637
  "model_module_version": "1.2.0",
@@ -683,7 +683,7 @@
683
  "width": null
684
  }
685
  },
686
- "8dbe0b9597f5405495a933f0d9751cb3": {
687
  "model_module": "@jupyter-widgets/controls",
688
  "model_name": "DescriptionStyleModel",
689
  "model_module_version": "1.5.0",
@@ -698,7 +698,7 @@
698
  "description_width": ""
699
  }
700
  },
701
- "e77b98bdad2f4bfc822b3164a4b224c2": {
702
  "model_module": "@jupyter-widgets/controls",
703
  "model_name": "HBoxModel",
704
  "model_module_version": "1.5.0",
@@ -713,14 +713,14 @@
713
  "_view_name": "HBoxView",
714
  "box_style": "",
715
  "children": [
716
- "IPY_MODEL_2417ede3d19b4070bd94a4fdd8ca355f",
717
- "IPY_MODEL_e6af15ec16c1465c80eb633d8e5397a6",
718
- "IPY_MODEL_22d9d19be87b414da07ad3f996c8bf3f"
719
  ],
720
- "layout": "IPY_MODEL_fa717d810ac34c8cb51acd3e3780ee24"
721
  }
722
  },
723
- "2417ede3d19b4070bd94a4fdd8ca355f": {
724
  "model_module": "@jupyter-widgets/controls",
725
  "model_name": "HTMLModel",
726
  "model_module_version": "1.5.0",
@@ -735,13 +735,13 @@
735
  "_view_name": "HTMLView",
736
  "description": "",
737
  "description_tooltip": null,
738
- "layout": "IPY_MODEL_ebe0823ba52247a6a9129167d525136d",
739
  "placeholder": "​",
740
- "style": "IPY_MODEL_49deab7883f643148fafb9a48a346471",
741
- "value": "model.safetensors: 100%"
742
  }
743
  },
744
- "e6af15ec16c1465c80eb633d8e5397a6": {
745
  "model_module": "@jupyter-widgets/controls",
746
  "model_name": "FloatProgressModel",
747
  "model_module_version": "1.5.0",
@@ -757,15 +757,15 @@
757
  "bar_style": "success",
758
  "description": "",
759
  "description_tooltip": null,
760
- "layout": "IPY_MODEL_e002b3ed2e644385bcc9651558e435b4",
761
- "max": 1710540580,
762
  "min": 0,
763
  "orientation": "horizontal",
764
- "style": "IPY_MODEL_fd822812dea04a80b420b324ae7e482a",
765
- "value": 1710540580
766
  }
767
  },
768
- "22d9d19be87b414da07ad3f996c8bf3f": {
769
  "model_module": "@jupyter-widgets/controls",
770
  "model_name": "HTMLModel",
771
  "model_module_version": "1.5.0",
@@ -780,13 +780,13 @@
780
  "_view_name": "HTMLView",
781
  "description": "",
782
  "description_tooltip": null,
783
- "layout": "IPY_MODEL_6622dddcbc2e433dbc1e6c2a50c3e7f8",
784
  "placeholder": "​",
785
- "style": "IPY_MODEL_adad59feeffb46f083b06def7e8e418e",
786
- "value": " 1.71G/1.71G [00:19<00:00, 128MB/s]"
787
  }
788
  },
789
- "fa717d810ac34c8cb51acd3e3780ee24": {
790
  "model_module": "@jupyter-widgets/base",
791
  "model_name": "LayoutModel",
792
  "model_module_version": "1.2.0",
@@ -838,7 +838,7 @@
838
  "width": null
839
  }
840
  },
841
- "ebe0823ba52247a6a9129167d525136d": {
842
  "model_module": "@jupyter-widgets/base",
843
  "model_name": "LayoutModel",
844
  "model_module_version": "1.2.0",
@@ -890,7 +890,7 @@
890
  "width": null
891
  }
892
  },
893
- "49deab7883f643148fafb9a48a346471": {
894
  "model_module": "@jupyter-widgets/controls",
895
  "model_name": "DescriptionStyleModel",
896
  "model_module_version": "1.5.0",
@@ -905,7 +905,7 @@
905
  "description_width": ""
906
  }
907
  },
908
- "e002b3ed2e644385bcc9651558e435b4": {
909
  "model_module": "@jupyter-widgets/base",
910
  "model_name": "LayoutModel",
911
  "model_module_version": "1.2.0",
@@ -957,7 +957,7 @@
957
  "width": null
958
  }
959
  },
960
- "fd822812dea04a80b420b324ae7e482a": {
961
  "model_module": "@jupyter-widgets/controls",
962
  "model_name": "ProgressStyleModel",
963
  "model_module_version": "1.5.0",
@@ -973,7 +973,7 @@
973
  "description_width": ""
974
  }
975
  },
976
- "6622dddcbc2e433dbc1e6c2a50c3e7f8": {
977
  "model_module": "@jupyter-widgets/base",
978
  "model_name": "LayoutModel",
979
  "model_module_version": "1.2.0",
@@ -1025,7 +1025,691 @@
1025
  "width": null
1026
  }
1027
  },
1028
- "adad59feeffb46f083b06def7e8e418e": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1029
  "model_module": "@jupyter-widgets/controls",
1030
  "model_name": "DescriptionStyleModel",
1031
  "model_module_version": "1.5.0",
@@ -1321,10 +2005,470 @@
1321
  "# See this link for additional stuff to do with shelve: https://docs.python.org/3/library/shelve.html"
1322
  ],
1323
  "metadata": {
1324
- "id": "iWeFnT1gAx6A"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1325
  },
1326
- "execution_count": null,
1327
- "outputs": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1328
  },
1329
  {
1330
  "cell_type": "markdown",
@@ -1385,7 +2529,7 @@
1385
  ],
1386
  "metadata": {
1387
  "id": "ke6mZ1RZDOeB",
1388
- "outputId": "2e7296f8-2f71-4462-e2fe-16b89596f260",
1389
  "colab": {
1390
  "base_uri": "https://localhost:8080/",
1391
  "height": 1000
@@ -1413,7 +2557,7 @@
1413
  "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
1414
  "from transformers import CLIPProcessor, CLIPModel\n",
1415
  "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-large-patch14\" , clean_up_tokenization_spaces = True)\n",
1416
- "model = CLIPModel.from_pretrained(\"openai/clip-vit-large-patch14\", weights_only=True)\n",
1417
  "\n",
1418
  "# Get image features\n",
1419
  "inputs = processor(images=image_A, return_tensors=\"pt\")\n",
@@ -1450,101 +2594,10 @@
1450
  "d.close() #close the file"
1451
  ],
1452
  "metadata": {
1453
- "id": "gaOB8rsOneIa",
1454
- "outputId": "09ecffa7-2aa2-46d7-a7b1-62254c7914b0",
1455
- "colab": {
1456
- "base_uri": "https://localhost:8080/",
1457
- "height": 168,
1458
- "referenced_widgets": [
1459
- "41caa8e5f4a14315bcf166f523b211f2",
1460
- "3c332e856f4b4ae28f66f62eef1b70f3",
1461
- "91698f067b08434fb8fdc5ffe95556f0",
1462
- "260ccc18db5f4237824bf5f7b9d82f86",
1463
- "ca413af11195435c87c5ca5bcf584e20",
1464
- "fed70b803afd4765b07dfa06ad78c1e4",
1465
- "22c696eaecb541858b5e443073d4d5dc",
1466
- "c88fd9b677174ab39d9c123fcb2e4835",
1467
- "1755a7d4eb974639a5036829a0d9c105",
1468
- "d8f11fa03e7d4a1c93b24d29a799a762",
1469
- "36c842ffee2a4e8c8a7a9f3bf40c2408",
1470
- "585ad0a1c1cb43fdae92f59cc988d9d3",
1471
- "4be1e52f00374023b9336c8455883830",
1472
- "fa35e472a2134522ad14f7ef5184d684",
1473
- "a34c8c99b1b1485c947484286bb7efc2",
1474
- "96300f7e29e04a60a8a76c93894b9396",
1475
- "6751a1816b444fa09b23aba3db5cfd0e",
1476
- "5f5a9841f3284c94b28668bd3b20024a",
1477
- "31c3dbb150db4de9b4b745c55e6ae501",
1478
- "a3d37f8f8bfe44f5a2106658629800c4",
1479
- "6b141c814285449cab9d58b3f28a31ca",
1480
- "8dbe0b9597f5405495a933f0d9751cb3",
1481
- "e77b98bdad2f4bfc822b3164a4b224c2",
1482
- "2417ede3d19b4070bd94a4fdd8ca355f",
1483
- "e6af15ec16c1465c80eb633d8e5397a6",
1484
- "22d9d19be87b414da07ad3f996c8bf3f",
1485
- "fa717d810ac34c8cb51acd3e3780ee24",
1486
- "ebe0823ba52247a6a9129167d525136d",
1487
- "49deab7883f643148fafb9a48a346471",
1488
- "e002b3ed2e644385bcc9651558e435b4",
1489
- "fd822812dea04a80b420b324ae7e482a",
1490
- "6622dddcbc2e433dbc1e6c2a50c3e7f8",
1491
- "adad59feeffb46f083b06def7e8e418e"
1492
- ]
1493
- }
1494
  },
1495
- "execution_count": 4,
1496
- "outputs": [
1497
- {
1498
- "output_type": "display_data",
1499
- "data": {
1500
- "text/plain": [
1501
- "preprocessor_config.json: 0%| | 0.00/316 [00:00<?, ?B/s]"
1502
- ],
1503
- "application/vnd.jupyter.widget-view+json": {
1504
- "version_major": 2,
1505
- "version_minor": 0,
1506
- "model_id": "41caa8e5f4a14315bcf166f523b211f2"
1507
- }
1508
- },
1509
- "metadata": {}
1510
- },
1511
- {
1512
- "output_type": "display_data",
1513
- "data": {
1514
- "text/plain": [
1515
- "config.json: 0%| | 0.00/4.52k [00:00<?, ?B/s]"
1516
- ],
1517
- "application/vnd.jupyter.widget-view+json": {
1518
- "version_major": 2,
1519
- "version_minor": 0,
1520
- "model_id": "585ad0a1c1cb43fdae92f59cc988d9d3"
1521
- }
1522
- },
1523
- "metadata": {}
1524
- },
1525
- {
1526
- "output_type": "display_data",
1527
- "data": {
1528
- "text/plain": [
1529
- "model.safetensors: 0%| | 0.00/1.71G [00:00<?, ?B/s]"
1530
- ],
1531
- "application/vnd.jupyter.widget-view+json": {
1532
- "version_major": 2,
1533
- "version_minor": 0,
1534
- "model_id": "e77b98bdad2f4bfc822b3164a4b224c2"
1535
- }
1536
- },
1537
- "metadata": {}
1538
- },
1539
- {
1540
- "output_type": "stream",
1541
- "name": "stderr",
1542
- "text": [
1543
- "/usr/local/lib/python3.10/dist-packages/torch/storage.py:414: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
1544
- " return torch.load(io.BytesIO(b))\n"
1545
- ]
1546
- }
1547
- ]
1548
  },
1549
  {
1550
  "cell_type": "code",
@@ -1575,7 +2628,7 @@
1575
  ],
1576
  "metadata": {
1577
  "id": "eZqMUhP0qYaK",
1578
- "outputId": "a66dd8a3-0192-4175-d08e-d0ad6914f6d1",
1579
  "colab": {
1580
  "base_uri": "https://localhost:8080/"
1581
  }
 
14
  },
15
  "widgets": {
16
  "application/vnd.jupyter.widget-state+json": {
17
+ "a9ef4e8a75744de8aa5415b1db4eea10": {
18
  "model_module": "@jupyter-widgets/controls",
19
  "model_name": "HBoxModel",
20
  "model_module_version": "1.5.0",
 
29
  "_view_name": "HBoxView",
30
  "box_style": "",
31
  "children": [
32
+ "IPY_MODEL_7b8d395589344f3f924f9e155c42f2e2",
33
+ "IPY_MODEL_3e86dfa658bb4c158e39acd9d0621cf7",
34
+ "IPY_MODEL_ed3598441ca645c8b288ad212540e7a6"
35
  ],
36
+ "layout": "IPY_MODEL_837d9220ce274f99bcdcd96696d005f2"
37
  }
38
  },
39
+ "7b8d395589344f3f924f9e155c42f2e2": {
40
  "model_module": "@jupyter-widgets/controls",
41
  "model_name": "HTMLModel",
42
  "model_module_version": "1.5.0",
 
51
  "_view_name": "HTMLView",
52
  "description": "",
53
  "description_tooltip": null,
54
+ "layout": "IPY_MODEL_f74d31f204e54bf486d54fd0f1836c6d",
55
  "placeholder": "​",
56
+ "style": "IPY_MODEL_b541cd8491ea4ef3b5e898a708e8c7c4",
57
+ "value": "tokenizer_config.json: 100%"
58
  }
59
  },
60
+ "3e86dfa658bb4c158e39acd9d0621cf7": {
61
  "model_module": "@jupyter-widgets/controls",
62
  "model_name": "FloatProgressModel",
63
  "model_module_version": "1.5.0",
 
73
  "bar_style": "success",
74
  "description": "",
75
  "description_tooltip": null,
76
+ "layout": "IPY_MODEL_eeab01019d2e4c15997a89ac6b63e0e3",
77
+ "max": 905,
78
  "min": 0,
79
  "orientation": "horizontal",
80
+ "style": "IPY_MODEL_d9a9de3142c647d7817de26ae1b494bc",
81
+ "value": 905
82
  }
83
  },
84
+ "ed3598441ca645c8b288ad212540e7a6": {
85
  "model_module": "@jupyter-widgets/controls",
86
  "model_name": "HTMLModel",
87
  "model_module_version": "1.5.0",
 
96
  "_view_name": "HTMLView",
97
  "description": "",
98
  "description_tooltip": null,
99
+ "layout": "IPY_MODEL_ce3af20e0ed04b94a272037a0505b628",
100
  "placeholder": "​",
101
+ "style": "IPY_MODEL_a38f43c0676c470aa92085295c86a2f5",
102
+ "value": " 905/905 [00:00&lt;00:00, 14.0kB/s]"
103
  }
104
  },
105
+ "837d9220ce274f99bcdcd96696d005f2": {
106
  "model_module": "@jupyter-widgets/base",
107
  "model_name": "LayoutModel",
108
  "model_module_version": "1.2.0",
 
154
  "width": null
155
  }
156
  },
157
+ "f74d31f204e54bf486d54fd0f1836c6d": {
158
  "model_module": "@jupyter-widgets/base",
159
  "model_name": "LayoutModel",
160
  "model_module_version": "1.2.0",
 
206
  "width": null
207
  }
208
  },
209
+ "b541cd8491ea4ef3b5e898a708e8c7c4": {
210
  "model_module": "@jupyter-widgets/controls",
211
  "model_name": "DescriptionStyleModel",
212
  "model_module_version": "1.5.0",
 
221
  "description_width": ""
222
  }
223
  },
224
+ "eeab01019d2e4c15997a89ac6b63e0e3": {
225
  "model_module": "@jupyter-widgets/base",
226
  "model_name": "LayoutModel",
227
  "model_module_version": "1.2.0",
 
273
  "width": null
274
  }
275
  },
276
+ "d9a9de3142c647d7817de26ae1b494bc": {
277
  "model_module": "@jupyter-widgets/controls",
278
  "model_name": "ProgressStyleModel",
279
  "model_module_version": "1.5.0",
 
289
  "description_width": ""
290
  }
291
  },
292
+ "ce3af20e0ed04b94a272037a0505b628": {
293
  "model_module": "@jupyter-widgets/base",
294
  "model_name": "LayoutModel",
295
  "model_module_version": "1.2.0",
 
341
  "width": null
342
  }
343
  },
344
+ "a38f43c0676c470aa92085295c86a2f5": {
345
  "model_module": "@jupyter-widgets/controls",
346
  "model_name": "DescriptionStyleModel",
347
  "model_module_version": "1.5.0",
 
356
  "description_width": ""
357
  }
358
  },
359
+ "5c3ba6ab7d454729bfc51fe4b424016b": {
360
  "model_module": "@jupyter-widgets/controls",
361
  "model_name": "HBoxModel",
362
  "model_module_version": "1.5.0",
 
371
  "_view_name": "HBoxView",
372
  "box_style": "",
373
  "children": [
374
+ "IPY_MODEL_35c1ab379ab84803801ecb8f0ca02cc2",
375
+ "IPY_MODEL_36d4abf6f1c6490089d11f08aafffbd3",
376
+ "IPY_MODEL_4b46fd734f8c4d2da3578c8ed9ae65f9"
377
  ],
378
+ "layout": "IPY_MODEL_15ae48732fa24f4992f878a8bdbd270c"
379
  }
380
  },
381
+ "35c1ab379ab84803801ecb8f0ca02cc2": {
382
  "model_module": "@jupyter-widgets/controls",
383
  "model_name": "HTMLModel",
384
  "model_module_version": "1.5.0",
 
393
  "_view_name": "HTMLView",
394
  "description": "",
395
  "description_tooltip": null,
396
+ "layout": "IPY_MODEL_ed5d90f26adf4cbbbc55b4ae9bcb5dfd",
397
  "placeholder": "​",
398
+ "style": "IPY_MODEL_753d4170097343619be3bc9f8a1db292",
399
+ "value": "vocab.json: 100%"
400
  }
401
  },
402
+ "36d4abf6f1c6490089d11f08aafffbd3": {
403
  "model_module": "@jupyter-widgets/controls",
404
  "model_name": "FloatProgressModel",
405
  "model_module_version": "1.5.0",
 
415
  "bar_style": "success",
416
  "description": "",
417
  "description_tooltip": null,
418
+ "layout": "IPY_MODEL_b163e9677bcc49179d9a39dd7ba7b15c",
419
+ "max": 961143,
420
  "min": 0,
421
  "orientation": "horizontal",
422
+ "style": "IPY_MODEL_6d8b715d7fac49358e852dfd356d6bfa",
423
+ "value": 961143
424
  }
425
  },
426
+ "4b46fd734f8c4d2da3578c8ed9ae65f9": {
427
  "model_module": "@jupyter-widgets/controls",
428
  "model_name": "HTMLModel",
429
  "model_module_version": "1.5.0",
 
438
  "_view_name": "HTMLView",
439
  "description": "",
440
  "description_tooltip": null,
441
+ "layout": "IPY_MODEL_d6456001eee242838e9543d99632af20",
442
  "placeholder": "​",
443
+ "style": "IPY_MODEL_739b6630e62e4aae9666d20070d4094f",
444
+ "value": " 961k/961k [00:00&lt;00:00, 4.64MB/s]"
445
  }
446
  },
447
+ "15ae48732fa24f4992f878a8bdbd270c": {
448
  "model_module": "@jupyter-widgets/base",
449
  "model_name": "LayoutModel",
450
  "model_module_version": "1.2.0",
 
496
  "width": null
497
  }
498
  },
499
+ "ed5d90f26adf4cbbbc55b4ae9bcb5dfd": {
500
  "model_module": "@jupyter-widgets/base",
501
  "model_name": "LayoutModel",
502
  "model_module_version": "1.2.0",
 
548
  "width": null
549
  }
550
  },
551
+ "753d4170097343619be3bc9f8a1db292": {
552
  "model_module": "@jupyter-widgets/controls",
553
  "model_name": "DescriptionStyleModel",
554
  "model_module_version": "1.5.0",
 
563
  "description_width": ""
564
  }
565
  },
566
+ "b163e9677bcc49179d9a39dd7ba7b15c": {
567
  "model_module": "@jupyter-widgets/base",
568
  "model_name": "LayoutModel",
569
  "model_module_version": "1.2.0",
 
615
  "width": null
616
  }
617
  },
618
+ "6d8b715d7fac49358e852dfd356d6bfa": {
619
  "model_module": "@jupyter-widgets/controls",
620
  "model_name": "ProgressStyleModel",
621
  "model_module_version": "1.5.0",
 
631
  "description_width": ""
632
  }
633
  },
634
+ "d6456001eee242838e9543d99632af20": {
635
  "model_module": "@jupyter-widgets/base",
636
  "model_name": "LayoutModel",
637
  "model_module_version": "1.2.0",
 
683
  "width": null
684
  }
685
  },
686
+ "739b6630e62e4aae9666d20070d4094f": {
687
  "model_module": "@jupyter-widgets/controls",
688
  "model_name": "DescriptionStyleModel",
689
  "model_module_version": "1.5.0",
 
698
  "description_width": ""
699
  }
700
  },
701
+ "a8b80d53ae86498b83f1d95eb73ed79b": {
702
  "model_module": "@jupyter-widgets/controls",
703
  "model_name": "HBoxModel",
704
  "model_module_version": "1.5.0",
 
713
  "_view_name": "HBoxView",
714
  "box_style": "",
715
  "children": [
716
+ "IPY_MODEL_45a31ad2067447c4a1f692462a1a6328",
717
+ "IPY_MODEL_84f0d7a8123b4ff6bff07ddee9597cb7",
718
+ "IPY_MODEL_fb79ccb21a3a48a29998a3ad9b453473"
719
  ],
720
+ "layout": "IPY_MODEL_c5e1aacbaada46419a3b080d9fb7dcbc"
721
  }
722
  },
723
+ "45a31ad2067447c4a1f692462a1a6328": {
724
  "model_module": "@jupyter-widgets/controls",
725
  "model_name": "HTMLModel",
726
  "model_module_version": "1.5.0",
 
735
  "_view_name": "HTMLView",
736
  "description": "",
737
  "description_tooltip": null,
738
+ "layout": "IPY_MODEL_8136a9cbf6224f56b7313d9b1f0ee24a",
739
  "placeholder": "​",
740
+ "style": "IPY_MODEL_2392b9a8970f4e38b29a791a72330f2f",
741
+ "value": "merges.txt: 100%"
742
  }
743
  },
744
+ "84f0d7a8123b4ff6bff07ddee9597cb7": {
745
  "model_module": "@jupyter-widgets/controls",
746
  "model_name": "FloatProgressModel",
747
  "model_module_version": "1.5.0",
 
757
  "bar_style": "success",
758
  "description": "",
759
  "description_tooltip": null,
760
+ "layout": "IPY_MODEL_9eb6f1d3a5f5403dbce0e1ad7be66c84",
761
+ "max": 524619,
762
  "min": 0,
763
  "orientation": "horizontal",
764
+ "style": "IPY_MODEL_f0b0e5463ef5431dafe2407134700890",
765
+ "value": 524619
766
  }
767
  },
768
+ "fb79ccb21a3a48a29998a3ad9b453473": {
769
  "model_module": "@jupyter-widgets/controls",
770
  "model_name": "HTMLModel",
771
  "model_module_version": "1.5.0",
 
780
  "_view_name": "HTMLView",
781
  "description": "",
782
  "description_tooltip": null,
783
+ "layout": "IPY_MODEL_e0596c2620a84746bdfbd7b1970c3fec",
784
  "placeholder": "​",
785
+ "style": "IPY_MODEL_5fe581987ed040d6b0edb8cfa35c83d3",
786
+ "value": " 525k/525k [00:00&lt;00:00, 10.9MB/s]"
787
  }
788
  },
789
+ "c5e1aacbaada46419a3b080d9fb7dcbc": {
790
  "model_module": "@jupyter-widgets/base",
791
  "model_name": "LayoutModel",
792
  "model_module_version": "1.2.0",
 
838
  "width": null
839
  }
840
  },
841
+ "8136a9cbf6224f56b7313d9b1f0ee24a": {
842
  "model_module": "@jupyter-widgets/base",
843
  "model_name": "LayoutModel",
844
  "model_module_version": "1.2.0",
 
890
  "width": null
891
  }
892
  },
893
+ "2392b9a8970f4e38b29a791a72330f2f": {
894
  "model_module": "@jupyter-widgets/controls",
895
  "model_name": "DescriptionStyleModel",
896
  "model_module_version": "1.5.0",
 
905
  "description_width": ""
906
  }
907
  },
908
+ "9eb6f1d3a5f5403dbce0e1ad7be66c84": {
909
  "model_module": "@jupyter-widgets/base",
910
  "model_name": "LayoutModel",
911
  "model_module_version": "1.2.0",
 
957
  "width": null
958
  }
959
  },
960
+ "f0b0e5463ef5431dafe2407134700890": {
961
  "model_module": "@jupyter-widgets/controls",
962
  "model_name": "ProgressStyleModel",
963
  "model_module_version": "1.5.0",
 
973
  "description_width": ""
974
  }
975
  },
976
+ "e0596c2620a84746bdfbd7b1970c3fec": {
977
  "model_module": "@jupyter-widgets/base",
978
  "model_name": "LayoutModel",
979
  "model_module_version": "1.2.0",
 
1025
  "width": null
1026
  }
1027
  },
1028
+ "5fe581987ed040d6b0edb8cfa35c83d3": {
1029
+ "model_module": "@jupyter-widgets/controls",
1030
+ "model_name": "DescriptionStyleModel",
1031
+ "model_module_version": "1.5.0",
1032
+ "state": {
1033
+ "_model_module": "@jupyter-widgets/controls",
1034
+ "_model_module_version": "1.5.0",
1035
+ "_model_name": "DescriptionStyleModel",
1036
+ "_view_count": null,
1037
+ "_view_module": "@jupyter-widgets/base",
1038
+ "_view_module_version": "1.2.0",
1039
+ "_view_name": "StyleView",
1040
+ "description_width": ""
1041
+ }
1042
+ },
1043
+ "d713cb1329124a55829e196919bbeebe": {
1044
+ "model_module": "@jupyter-widgets/controls",
1045
+ "model_name": "HBoxModel",
1046
+ "model_module_version": "1.5.0",
1047
+ "state": {
1048
+ "_dom_classes": [],
1049
+ "_model_module": "@jupyter-widgets/controls",
1050
+ "_model_module_version": "1.5.0",
1051
+ "_model_name": "HBoxModel",
1052
+ "_view_count": null,
1053
+ "_view_module": "@jupyter-widgets/controls",
1054
+ "_view_module_version": "1.5.0",
1055
+ "_view_name": "HBoxView",
1056
+ "box_style": "",
1057
+ "children": [
1058
+ "IPY_MODEL_2834ce621fc64c14a025f49a5a5c0156",
1059
+ "IPY_MODEL_670e6e2290644c9dac59ee5bfff368cf",
1060
+ "IPY_MODEL_a1e23afe9a624b5b8abc5fe0cf576e41"
1061
+ ],
1062
+ "layout": "IPY_MODEL_02e4857a5f674b6fbf528c5ebf0c75d8"
1063
+ }
1064
+ },
1065
+ "2834ce621fc64c14a025f49a5a5c0156": {
1066
+ "model_module": "@jupyter-widgets/controls",
1067
+ "model_name": "HTMLModel",
1068
+ "model_module_version": "1.5.0",
1069
+ "state": {
1070
+ "_dom_classes": [],
1071
+ "_model_module": "@jupyter-widgets/controls",
1072
+ "_model_module_version": "1.5.0",
1073
+ "_model_name": "HTMLModel",
1074
+ "_view_count": null,
1075
+ "_view_module": "@jupyter-widgets/controls",
1076
+ "_view_module_version": "1.5.0",
1077
+ "_view_name": "HTMLView",
1078
+ "description": "",
1079
+ "description_tooltip": null,
1080
+ "layout": "IPY_MODEL_74b454dff7b94965b43c5d3fb166a784",
1081
+ "placeholder": "​",
1082
+ "style": "IPY_MODEL_35c1bd03e909414ba339f584ad1d632c",
1083
+ "value": "tokenizer.json: 100%"
1084
+ }
1085
+ },
1086
+ "670e6e2290644c9dac59ee5bfff368cf": {
1087
+ "model_module": "@jupyter-widgets/controls",
1088
+ "model_name": "FloatProgressModel",
1089
+ "model_module_version": "1.5.0",
1090
+ "state": {
1091
+ "_dom_classes": [],
1092
+ "_model_module": "@jupyter-widgets/controls",
1093
+ "_model_module_version": "1.5.0",
1094
+ "_model_name": "FloatProgressModel",
1095
+ "_view_count": null,
1096
+ "_view_module": "@jupyter-widgets/controls",
1097
+ "_view_module_version": "1.5.0",
1098
+ "_view_name": "ProgressView",
1099
+ "bar_style": "success",
1100
+ "description": "",
1101
+ "description_tooltip": null,
1102
+ "layout": "IPY_MODEL_e7ba6190c8564c86a1168567cceddfd7",
1103
+ "max": 2224003,
1104
+ "min": 0,
1105
+ "orientation": "horizontal",
1106
+ "style": "IPY_MODEL_fc1849474a4348579edc9699470a4a10",
1107
+ "value": 2224003
1108
+ }
1109
+ },
1110
+ "a1e23afe9a624b5b8abc5fe0cf576e41": {
1111
+ "model_module": "@jupyter-widgets/controls",
1112
+ "model_name": "HTMLModel",
1113
+ "model_module_version": "1.5.0",
1114
+ "state": {
1115
+ "_dom_classes": [],
1116
+ "_model_module": "@jupyter-widgets/controls",
1117
+ "_model_module_version": "1.5.0",
1118
+ "_model_name": "HTMLModel",
1119
+ "_view_count": null,
1120
+ "_view_module": "@jupyter-widgets/controls",
1121
+ "_view_module_version": "1.5.0",
1122
+ "_view_name": "HTMLView",
1123
+ "description": "",
1124
+ "description_tooltip": null,
1125
+ "layout": "IPY_MODEL_4a35817c51e24b399d8014cc9e66e723",
1126
+ "placeholder": "​",
1127
+ "style": "IPY_MODEL_cbf1975e54c544ba93ef76f7d0430551",
1128
+ "value": " 2.22M/2.22M [00:00&lt;00:00, 14.0MB/s]"
1129
+ }
1130
+ },
1131
+ "02e4857a5f674b6fbf528c5ebf0c75d8": {
1132
+ "model_module": "@jupyter-widgets/base",
1133
+ "model_name": "LayoutModel",
1134
+ "model_module_version": "1.2.0",
1135
+ "state": {
1136
+ "_model_module": "@jupyter-widgets/base",
1137
+ "_model_module_version": "1.2.0",
1138
+ "_model_name": "LayoutModel",
1139
+ "_view_count": null,
1140
+ "_view_module": "@jupyter-widgets/base",
1141
+ "_view_module_version": "1.2.0",
1142
+ "_view_name": "LayoutView",
1143
+ "align_content": null,
1144
+ "align_items": null,
1145
+ "align_self": null,
1146
+ "border": null,
1147
+ "bottom": null,
1148
+ "display": null,
1149
+ "flex": null,
1150
+ "flex_flow": null,
1151
+ "grid_area": null,
1152
+ "grid_auto_columns": null,
1153
+ "grid_auto_flow": null,
1154
+ "grid_auto_rows": null,
1155
+ "grid_column": null,
1156
+ "grid_gap": null,
1157
+ "grid_row": null,
1158
+ "grid_template_areas": null,
1159
+ "grid_template_columns": null,
1160
+ "grid_template_rows": null,
1161
+ "height": null,
1162
+ "justify_content": null,
1163
+ "justify_items": null,
1164
+ "left": null,
1165
+ "margin": null,
1166
+ "max_height": null,
1167
+ "max_width": null,
1168
+ "min_height": null,
1169
+ "min_width": null,
1170
+ "object_fit": null,
1171
+ "object_position": null,
1172
+ "order": null,
1173
+ "overflow": null,
1174
+ "overflow_x": null,
1175
+ "overflow_y": null,
1176
+ "padding": null,
1177
+ "right": null,
1178
+ "top": null,
1179
+ "visibility": null,
1180
+ "width": null
1181
+ }
1182
+ },
1183
+ "74b454dff7b94965b43c5d3fb166a784": {
1184
+ "model_module": "@jupyter-widgets/base",
1185
+ "model_name": "LayoutModel",
1186
+ "model_module_version": "1.2.0",
1187
+ "state": {
1188
+ "_model_module": "@jupyter-widgets/base",
1189
+ "_model_module_version": "1.2.0",
1190
+ "_model_name": "LayoutModel",
1191
+ "_view_count": null,
1192
+ "_view_module": "@jupyter-widgets/base",
1193
+ "_view_module_version": "1.2.0",
1194
+ "_view_name": "LayoutView",
1195
+ "align_content": null,
1196
+ "align_items": null,
1197
+ "align_self": null,
1198
+ "border": null,
1199
+ "bottom": null,
1200
+ "display": null,
1201
+ "flex": null,
1202
+ "flex_flow": null,
1203
+ "grid_area": null,
1204
+ "grid_auto_columns": null,
1205
+ "grid_auto_flow": null,
1206
+ "grid_auto_rows": null,
1207
+ "grid_column": null,
1208
+ "grid_gap": null,
1209
+ "grid_row": null,
1210
+ "grid_template_areas": null,
1211
+ "grid_template_columns": null,
1212
+ "grid_template_rows": null,
1213
+ "height": null,
1214
+ "justify_content": null,
1215
+ "justify_items": null,
1216
+ "left": null,
1217
+ "margin": null,
1218
+ "max_height": null,
1219
+ "max_width": null,
1220
+ "min_height": null,
1221
+ "min_width": null,
1222
+ "object_fit": null,
1223
+ "object_position": null,
1224
+ "order": null,
1225
+ "overflow": null,
1226
+ "overflow_x": null,
1227
+ "overflow_y": null,
1228
+ "padding": null,
1229
+ "right": null,
1230
+ "top": null,
1231
+ "visibility": null,
1232
+ "width": null
1233
+ }
1234
+ },
1235
+ "35c1bd03e909414ba339f584ad1d632c": {
1236
+ "model_module": "@jupyter-widgets/controls",
1237
+ "model_name": "DescriptionStyleModel",
1238
+ "model_module_version": "1.5.0",
1239
+ "state": {
1240
+ "_model_module": "@jupyter-widgets/controls",
1241
+ "_model_module_version": "1.5.0",
1242
+ "_model_name": "DescriptionStyleModel",
1243
+ "_view_count": null,
1244
+ "_view_module": "@jupyter-widgets/base",
1245
+ "_view_module_version": "1.2.0",
1246
+ "_view_name": "StyleView",
1247
+ "description_width": ""
1248
+ }
1249
+ },
1250
+ "e7ba6190c8564c86a1168567cceddfd7": {
1251
+ "model_module": "@jupyter-widgets/base",
1252
+ "model_name": "LayoutModel",
1253
+ "model_module_version": "1.2.0",
1254
+ "state": {
1255
+ "_model_module": "@jupyter-widgets/base",
1256
+ "_model_module_version": "1.2.0",
1257
+ "_model_name": "LayoutModel",
1258
+ "_view_count": null,
1259
+ "_view_module": "@jupyter-widgets/base",
1260
+ "_view_module_version": "1.2.0",
1261
+ "_view_name": "LayoutView",
1262
+ "align_content": null,
1263
+ "align_items": null,
1264
+ "align_self": null,
1265
+ "border": null,
1266
+ "bottom": null,
1267
+ "display": null,
1268
+ "flex": null,
1269
+ "flex_flow": null,
1270
+ "grid_area": null,
1271
+ "grid_auto_columns": null,
1272
+ "grid_auto_flow": null,
1273
+ "grid_auto_rows": null,
1274
+ "grid_column": null,
1275
+ "grid_gap": null,
1276
+ "grid_row": null,
1277
+ "grid_template_areas": null,
1278
+ "grid_template_columns": null,
1279
+ "grid_template_rows": null,
1280
+ "height": null,
1281
+ "justify_content": null,
1282
+ "justify_items": null,
1283
+ "left": null,
1284
+ "margin": null,
1285
+ "max_height": null,
1286
+ "max_width": null,
1287
+ "min_height": null,
1288
+ "min_width": null,
1289
+ "object_fit": null,
1290
+ "object_position": null,
1291
+ "order": null,
1292
+ "overflow": null,
1293
+ "overflow_x": null,
1294
+ "overflow_y": null,
1295
+ "padding": null,
1296
+ "right": null,
1297
+ "top": null,
1298
+ "visibility": null,
1299
+ "width": null
1300
+ }
1301
+ },
1302
+ "fc1849474a4348579edc9699470a4a10": {
1303
+ "model_module": "@jupyter-widgets/controls",
1304
+ "model_name": "ProgressStyleModel",
1305
+ "model_module_version": "1.5.0",
1306
+ "state": {
1307
+ "_model_module": "@jupyter-widgets/controls",
1308
+ "_model_module_version": "1.5.0",
1309
+ "_model_name": "ProgressStyleModel",
1310
+ "_view_count": null,
1311
+ "_view_module": "@jupyter-widgets/base",
1312
+ "_view_module_version": "1.2.0",
1313
+ "_view_name": "StyleView",
1314
+ "bar_color": null,
1315
+ "description_width": ""
1316
+ }
1317
+ },
1318
+ "4a35817c51e24b399d8014cc9e66e723": {
1319
+ "model_module": "@jupyter-widgets/base",
1320
+ "model_name": "LayoutModel",
1321
+ "model_module_version": "1.2.0",
1322
+ "state": {
1323
+ "_model_module": "@jupyter-widgets/base",
1324
+ "_model_module_version": "1.2.0",
1325
+ "_model_name": "LayoutModel",
1326
+ "_view_count": null,
1327
+ "_view_module": "@jupyter-widgets/base",
1328
+ "_view_module_version": "1.2.0",
1329
+ "_view_name": "LayoutView",
1330
+ "align_content": null,
1331
+ "align_items": null,
1332
+ "align_self": null,
1333
+ "border": null,
1334
+ "bottom": null,
1335
+ "display": null,
1336
+ "flex": null,
1337
+ "flex_flow": null,
1338
+ "grid_area": null,
1339
+ "grid_auto_columns": null,
1340
+ "grid_auto_flow": null,
1341
+ "grid_auto_rows": null,
1342
+ "grid_column": null,
1343
+ "grid_gap": null,
1344
+ "grid_row": null,
1345
+ "grid_template_areas": null,
1346
+ "grid_template_columns": null,
1347
+ "grid_template_rows": null,
1348
+ "height": null,
1349
+ "justify_content": null,
1350
+ "justify_items": null,
1351
+ "left": null,
1352
+ "margin": null,
1353
+ "max_height": null,
1354
+ "max_width": null,
1355
+ "min_height": null,
1356
+ "min_width": null,
1357
+ "object_fit": null,
1358
+ "object_position": null,
1359
+ "order": null,
1360
+ "overflow": null,
1361
+ "overflow_x": null,
1362
+ "overflow_y": null,
1363
+ "padding": null,
1364
+ "right": null,
1365
+ "top": null,
1366
+ "visibility": null,
1367
+ "width": null
1368
+ }
1369
+ },
1370
+ "cbf1975e54c544ba93ef76f7d0430551": {
1371
+ "model_module": "@jupyter-widgets/controls",
1372
+ "model_name": "DescriptionStyleModel",
1373
+ "model_module_version": "1.5.0",
1374
+ "state": {
1375
+ "_model_module": "@jupyter-widgets/controls",
1376
+ "_model_module_version": "1.5.0",
1377
+ "_model_name": "DescriptionStyleModel",
1378
+ "_view_count": null,
1379
+ "_view_module": "@jupyter-widgets/base",
1380
+ "_view_module_version": "1.2.0",
1381
+ "_view_name": "StyleView",
1382
+ "description_width": ""
1383
+ }
1384
+ },
1385
+ "71024becb22f4758be2c790063347ac9": {
1386
+ "model_module": "@jupyter-widgets/controls",
1387
+ "model_name": "HBoxModel",
1388
+ "model_module_version": "1.5.0",
1389
+ "state": {
1390
+ "_dom_classes": [],
1391
+ "_model_module": "@jupyter-widgets/controls",
1392
+ "_model_module_version": "1.5.0",
1393
+ "_model_name": "HBoxModel",
1394
+ "_view_count": null,
1395
+ "_view_module": "@jupyter-widgets/controls",
1396
+ "_view_module_version": "1.5.0",
1397
+ "_view_name": "HBoxView",
1398
+ "box_style": "",
1399
+ "children": [
1400
+ "IPY_MODEL_637917dff73945c0820c5a71400c4145",
1401
+ "IPY_MODEL_31b1d3391e4742d6a822796df9d874f7",
1402
+ "IPY_MODEL_b048d1da65524b0d80ad445b36dd4ee2"
1403
+ ],
1404
+ "layout": "IPY_MODEL_20122c9d0c554702a557ab0b039cca77"
1405
+ }
1406
+ },
1407
+ "637917dff73945c0820c5a71400c4145": {
1408
+ "model_module": "@jupyter-widgets/controls",
1409
+ "model_name": "HTMLModel",
1410
+ "model_module_version": "1.5.0",
1411
+ "state": {
1412
+ "_dom_classes": [],
1413
+ "_model_module": "@jupyter-widgets/controls",
1414
+ "_model_module_version": "1.5.0",
1415
+ "_model_name": "HTMLModel",
1416
+ "_view_count": null,
1417
+ "_view_module": "@jupyter-widgets/controls",
1418
+ "_view_module_version": "1.5.0",
1419
+ "_view_name": "HTMLView",
1420
+ "description": "",
1421
+ "description_tooltip": null,
1422
+ "layout": "IPY_MODEL_5a4756d40ce74a6a8efa2a50b1942755",
1423
+ "placeholder": "​",
1424
+ "style": "IPY_MODEL_cc263ef7a2c74de98950fa285bf263a6",
1425
+ "value": "special_tokens_map.json: 100%"
1426
+ }
1427
+ },
1428
+ "31b1d3391e4742d6a822796df9d874f7": {
1429
+ "model_module": "@jupyter-widgets/controls",
1430
+ "model_name": "FloatProgressModel",
1431
+ "model_module_version": "1.5.0",
1432
+ "state": {
1433
+ "_dom_classes": [],
1434
+ "_model_module": "@jupyter-widgets/controls",
1435
+ "_model_module_version": "1.5.0",
1436
+ "_model_name": "FloatProgressModel",
1437
+ "_view_count": null,
1438
+ "_view_module": "@jupyter-widgets/controls",
1439
+ "_view_module_version": "1.5.0",
1440
+ "_view_name": "ProgressView",
1441
+ "bar_style": "success",
1442
+ "description": "",
1443
+ "description_tooltip": null,
1444
+ "layout": "IPY_MODEL_a7674b9679db40cfb302809d97cc0abf",
1445
+ "max": 389,
1446
+ "min": 0,
1447
+ "orientation": "horizontal",
1448
+ "style": "IPY_MODEL_4036173505fb4b528055f8ed423d7070",
1449
+ "value": 389
1450
+ }
1451
+ },
1452
+ "b048d1da65524b0d80ad445b36dd4ee2": {
1453
+ "model_module": "@jupyter-widgets/controls",
1454
+ "model_name": "HTMLModel",
1455
+ "model_module_version": "1.5.0",
1456
+ "state": {
1457
+ "_dom_classes": [],
1458
+ "_model_module": "@jupyter-widgets/controls",
1459
+ "_model_module_version": "1.5.0",
1460
+ "_model_name": "HTMLModel",
1461
+ "_view_count": null,
1462
+ "_view_module": "@jupyter-widgets/controls",
1463
+ "_view_module_version": "1.5.0",
1464
+ "_view_name": "HTMLView",
1465
+ "description": "",
1466
+ "description_tooltip": null,
1467
+ "layout": "IPY_MODEL_693fdd338bfc458caab25dd454508948",
1468
+ "placeholder": "​",
1469
+ "style": "IPY_MODEL_b90ea0357c6149a196eeb7a4d490fb9d",
1470
+ "value": " 389/389 [00:00&lt;00:00, 6.76kB/s]"
1471
+ }
1472
+ },
1473
+ "20122c9d0c554702a557ab0b039cca77": {
1474
+ "model_module": "@jupyter-widgets/base",
1475
+ "model_name": "LayoutModel",
1476
+ "model_module_version": "1.2.0",
1477
+ "state": {
1478
+ "_model_module": "@jupyter-widgets/base",
1479
+ "_model_module_version": "1.2.0",
1480
+ "_model_name": "LayoutModel",
1481
+ "_view_count": null,
1482
+ "_view_module": "@jupyter-widgets/base",
1483
+ "_view_module_version": "1.2.0",
1484
+ "_view_name": "LayoutView",
1485
+ "align_content": null,
1486
+ "align_items": null,
1487
+ "align_self": null,
1488
+ "border": null,
1489
+ "bottom": null,
1490
+ "display": null,
1491
+ "flex": null,
1492
+ "flex_flow": null,
1493
+ "grid_area": null,
1494
+ "grid_auto_columns": null,
1495
+ "grid_auto_flow": null,
1496
+ "grid_auto_rows": null,
1497
+ "grid_column": null,
1498
+ "grid_gap": null,
1499
+ "grid_row": null,
1500
+ "grid_template_areas": null,
1501
+ "grid_template_columns": null,
1502
+ "grid_template_rows": null,
1503
+ "height": null,
1504
+ "justify_content": null,
1505
+ "justify_items": null,
1506
+ "left": null,
1507
+ "margin": null,
1508
+ "max_height": null,
1509
+ "max_width": null,
1510
+ "min_height": null,
1511
+ "min_width": null,
1512
+ "object_fit": null,
1513
+ "object_position": null,
1514
+ "order": null,
1515
+ "overflow": null,
1516
+ "overflow_x": null,
1517
+ "overflow_y": null,
1518
+ "padding": null,
1519
+ "right": null,
1520
+ "top": null,
1521
+ "visibility": null,
1522
+ "width": null
1523
+ }
1524
+ },
1525
+ "5a4756d40ce74a6a8efa2a50b1942755": {
1526
+ "model_module": "@jupyter-widgets/base",
1527
+ "model_name": "LayoutModel",
1528
+ "model_module_version": "1.2.0",
1529
+ "state": {
1530
+ "_model_module": "@jupyter-widgets/base",
1531
+ "_model_module_version": "1.2.0",
1532
+ "_model_name": "LayoutModel",
1533
+ "_view_count": null,
1534
+ "_view_module": "@jupyter-widgets/base",
1535
+ "_view_module_version": "1.2.0",
1536
+ "_view_name": "LayoutView",
1537
+ "align_content": null,
1538
+ "align_items": null,
1539
+ "align_self": null,
1540
+ "border": null,
1541
+ "bottom": null,
1542
+ "display": null,
1543
+ "flex": null,
1544
+ "flex_flow": null,
1545
+ "grid_area": null,
1546
+ "grid_auto_columns": null,
1547
+ "grid_auto_flow": null,
1548
+ "grid_auto_rows": null,
1549
+ "grid_column": null,
1550
+ "grid_gap": null,
1551
+ "grid_row": null,
1552
+ "grid_template_areas": null,
1553
+ "grid_template_columns": null,
1554
+ "grid_template_rows": null,
1555
+ "height": null,
1556
+ "justify_content": null,
1557
+ "justify_items": null,
1558
+ "left": null,
1559
+ "margin": null,
1560
+ "max_height": null,
1561
+ "max_width": null,
1562
+ "min_height": null,
1563
+ "min_width": null,
1564
+ "object_fit": null,
1565
+ "object_position": null,
1566
+ "order": null,
1567
+ "overflow": null,
1568
+ "overflow_x": null,
1569
+ "overflow_y": null,
1570
+ "padding": null,
1571
+ "right": null,
1572
+ "top": null,
1573
+ "visibility": null,
1574
+ "width": null
1575
+ }
1576
+ },
1577
+ "cc263ef7a2c74de98950fa285bf263a6": {
1578
+ "model_module": "@jupyter-widgets/controls",
1579
+ "model_name": "DescriptionStyleModel",
1580
+ "model_module_version": "1.5.0",
1581
+ "state": {
1582
+ "_model_module": "@jupyter-widgets/controls",
1583
+ "_model_module_version": "1.5.0",
1584
+ "_model_name": "DescriptionStyleModel",
1585
+ "_view_count": null,
1586
+ "_view_module": "@jupyter-widgets/base",
1587
+ "_view_module_version": "1.2.0",
1588
+ "_view_name": "StyleView",
1589
+ "description_width": ""
1590
+ }
1591
+ },
1592
+ "a7674b9679db40cfb302809d97cc0abf": {
1593
+ "model_module": "@jupyter-widgets/base",
1594
+ "model_name": "LayoutModel",
1595
+ "model_module_version": "1.2.0",
1596
+ "state": {
1597
+ "_model_module": "@jupyter-widgets/base",
1598
+ "_model_module_version": "1.2.0",
1599
+ "_model_name": "LayoutModel",
1600
+ "_view_count": null,
1601
+ "_view_module": "@jupyter-widgets/base",
1602
+ "_view_module_version": "1.2.0",
1603
+ "_view_name": "LayoutView",
1604
+ "align_content": null,
1605
+ "align_items": null,
1606
+ "align_self": null,
1607
+ "border": null,
1608
+ "bottom": null,
1609
+ "display": null,
1610
+ "flex": null,
1611
+ "flex_flow": null,
1612
+ "grid_area": null,
1613
+ "grid_auto_columns": null,
1614
+ "grid_auto_flow": null,
1615
+ "grid_auto_rows": null,
1616
+ "grid_column": null,
1617
+ "grid_gap": null,
1618
+ "grid_row": null,
1619
+ "grid_template_areas": null,
1620
+ "grid_template_columns": null,
1621
+ "grid_template_rows": null,
1622
+ "height": null,
1623
+ "justify_content": null,
1624
+ "justify_items": null,
1625
+ "left": null,
1626
+ "margin": null,
1627
+ "max_height": null,
1628
+ "max_width": null,
1629
+ "min_height": null,
1630
+ "min_width": null,
1631
+ "object_fit": null,
1632
+ "object_position": null,
1633
+ "order": null,
1634
+ "overflow": null,
1635
+ "overflow_x": null,
1636
+ "overflow_y": null,
1637
+ "padding": null,
1638
+ "right": null,
1639
+ "top": null,
1640
+ "visibility": null,
1641
+ "width": null
1642
+ }
1643
+ },
1644
+ "4036173505fb4b528055f8ed423d7070": {
1645
+ "model_module": "@jupyter-widgets/controls",
1646
+ "model_name": "ProgressStyleModel",
1647
+ "model_module_version": "1.5.0",
1648
+ "state": {
1649
+ "_model_module": "@jupyter-widgets/controls",
1650
+ "_model_module_version": "1.5.0",
1651
+ "_model_name": "ProgressStyleModel",
1652
+ "_view_count": null,
1653
+ "_view_module": "@jupyter-widgets/base",
1654
+ "_view_module_version": "1.2.0",
1655
+ "_view_name": "StyleView",
1656
+ "bar_color": null,
1657
+ "description_width": ""
1658
+ }
1659
+ },
1660
+ "693fdd338bfc458caab25dd454508948": {
1661
+ "model_module": "@jupyter-widgets/base",
1662
+ "model_name": "LayoutModel",
1663
+ "model_module_version": "1.2.0",
1664
+ "state": {
1665
+ "_model_module": "@jupyter-widgets/base",
1666
+ "_model_module_version": "1.2.0",
1667
+ "_model_name": "LayoutModel",
1668
+ "_view_count": null,
1669
+ "_view_module": "@jupyter-widgets/base",
1670
+ "_view_module_version": "1.2.0",
1671
+ "_view_name": "LayoutView",
1672
+ "align_content": null,
1673
+ "align_items": null,
1674
+ "align_self": null,
1675
+ "border": null,
1676
+ "bottom": null,
1677
+ "display": null,
1678
+ "flex": null,
1679
+ "flex_flow": null,
1680
+ "grid_area": null,
1681
+ "grid_auto_columns": null,
1682
+ "grid_auto_flow": null,
1683
+ "grid_auto_rows": null,
1684
+ "grid_column": null,
1685
+ "grid_gap": null,
1686
+ "grid_row": null,
1687
+ "grid_template_areas": null,
1688
+ "grid_template_columns": null,
1689
+ "grid_template_rows": null,
1690
+ "height": null,
1691
+ "justify_content": null,
1692
+ "justify_items": null,
1693
+ "left": null,
1694
+ "margin": null,
1695
+ "max_height": null,
1696
+ "max_width": null,
1697
+ "min_height": null,
1698
+ "min_width": null,
1699
+ "object_fit": null,
1700
+ "object_position": null,
1701
+ "order": null,
1702
+ "overflow": null,
1703
+ "overflow_x": null,
1704
+ "overflow_y": null,
1705
+ "padding": null,
1706
+ "right": null,
1707
+ "top": null,
1708
+ "visibility": null,
1709
+ "width": null
1710
+ }
1711
+ },
1712
+ "b90ea0357c6149a196eeb7a4d490fb9d": {
1713
  "model_module": "@jupyter-widgets/controls",
1714
  "model_name": "DescriptionStyleModel",
1715
  "model_module_version": "1.5.0",
 
2005
  "# See this link for additional stuff to do with shelve: https://docs.python.org/3/library/shelve.html"
2006
  ],
2007
  "metadata": {
2008
+ "id": "iWeFnT1gAx6A",
2009
+ "outputId": "c3ecb406-4a9b-4e00-90a0-467cab40aca1",
2010
+ "colab": {
2011
+ "base_uri": "https://localhost:8080/",
2012
+ "height": 1000,
2013
+ "referenced_widgets": [
2014
+ "a9ef4e8a75744de8aa5415b1db4eea10",
2015
+ "7b8d395589344f3f924f9e155c42f2e2",
2016
+ "3e86dfa658bb4c158e39acd9d0621cf7",
2017
+ "ed3598441ca645c8b288ad212540e7a6",
2018
+ "837d9220ce274f99bcdcd96696d005f2",
2019
+ "f74d31f204e54bf486d54fd0f1836c6d",
2020
+ "b541cd8491ea4ef3b5e898a708e8c7c4",
2021
+ "eeab01019d2e4c15997a89ac6b63e0e3",
2022
+ "d9a9de3142c647d7817de26ae1b494bc",
2023
+ "ce3af20e0ed04b94a272037a0505b628",
2024
+ "a38f43c0676c470aa92085295c86a2f5",
2025
+ "5c3ba6ab7d454729bfc51fe4b424016b",
2026
+ "35c1ab379ab84803801ecb8f0ca02cc2",
2027
+ "36d4abf6f1c6490089d11f08aafffbd3",
2028
+ "4b46fd734f8c4d2da3578c8ed9ae65f9",
2029
+ "15ae48732fa24f4992f878a8bdbd270c",
2030
+ "ed5d90f26adf4cbbbc55b4ae9bcb5dfd",
2031
+ "753d4170097343619be3bc9f8a1db292",
2032
+ "b163e9677bcc49179d9a39dd7ba7b15c",
2033
+ "6d8b715d7fac49358e852dfd356d6bfa",
2034
+ "d6456001eee242838e9543d99632af20",
2035
+ "739b6630e62e4aae9666d20070d4094f",
2036
+ "a8b80d53ae86498b83f1d95eb73ed79b",
2037
+ "45a31ad2067447c4a1f692462a1a6328",
2038
+ "84f0d7a8123b4ff6bff07ddee9597cb7",
2039
+ "fb79ccb21a3a48a29998a3ad9b453473",
2040
+ "c5e1aacbaada46419a3b080d9fb7dcbc",
2041
+ "8136a9cbf6224f56b7313d9b1f0ee24a",
2042
+ "2392b9a8970f4e38b29a791a72330f2f",
2043
+ "9eb6f1d3a5f5403dbce0e1ad7be66c84",
2044
+ "f0b0e5463ef5431dafe2407134700890",
2045
+ "e0596c2620a84746bdfbd7b1970c3fec",
2046
+ "5fe581987ed040d6b0edb8cfa35c83d3",
2047
+ "d713cb1329124a55829e196919bbeebe",
2048
+ "2834ce621fc64c14a025f49a5a5c0156",
2049
+ "670e6e2290644c9dac59ee5bfff368cf",
2050
+ "a1e23afe9a624b5b8abc5fe0cf576e41",
2051
+ "02e4857a5f674b6fbf528c5ebf0c75d8",
2052
+ "74b454dff7b94965b43c5d3fb166a784",
2053
+ "35c1bd03e909414ba339f584ad1d632c",
2054
+ "e7ba6190c8564c86a1168567cceddfd7",
2055
+ "fc1849474a4348579edc9699470a4a10",
2056
+ "4a35817c51e24b399d8014cc9e66e723",
2057
+ "cbf1975e54c544ba93ef76f7d0430551",
2058
+ "71024becb22f4758be2c790063347ac9",
2059
+ "637917dff73945c0820c5a71400c4145",
2060
+ "31b1d3391e4742d6a822796df9d874f7",
2061
+ "b048d1da65524b0d80ad445b36dd4ee2",
2062
+ "20122c9d0c554702a557ab0b039cca77",
2063
+ "5a4756d40ce74a6a8efa2a50b1942755",
2064
+ "cc263ef7a2c74de98950fa285bf263a6",
2065
+ "a7674b9679db40cfb302809d97cc0abf",
2066
+ "4036173505fb4b528055f8ed423d7070",
2067
+ "693fdd338bfc458caab25dd454508948",
2068
+ "b90ea0357c6149a196eeb7a4d490fb9d"
2069
+ ]
2070
+ }
2071
  },
2072
+ "execution_count": 2,
2073
+ "outputs": [
2074
+ {
2075
+ "output_type": "display_data",
2076
+ "data": {
2077
+ "text/plain": [
2078
+ "tokenizer_config.json: 0%| | 0.00/905 [00:00<?, ?B/s]"
2079
+ ],
2080
+ "application/vnd.jupyter.widget-view+json": {
2081
+ "version_major": 2,
2082
+ "version_minor": 0,
2083
+ "model_id": "a9ef4e8a75744de8aa5415b1db4eea10"
2084
+ }
2085
+ },
2086
+ "metadata": {}
2087
+ },
2088
+ {
2089
+ "output_type": "display_data",
2090
+ "data": {
2091
+ "text/plain": [
2092
+ "vocab.json: 0%| | 0.00/961k [00:00<?, ?B/s]"
2093
+ ],
2094
+ "application/vnd.jupyter.widget-view+json": {
2095
+ "version_major": 2,
2096
+ "version_minor": 0,
2097
+ "model_id": "5c3ba6ab7d454729bfc51fe4b424016b"
2098
+ }
2099
+ },
2100
+ "metadata": {}
2101
+ },
2102
+ {
2103
+ "output_type": "display_data",
2104
+ "data": {
2105
+ "text/plain": [
2106
+ "merges.txt: 0%| | 0.00/525k [00:00<?, ?B/s]"
2107
+ ],
2108
+ "application/vnd.jupyter.widget-view+json": {
2109
+ "version_major": 2,
2110
+ "version_minor": 0,
2111
+ "model_id": "a8b80d53ae86498b83f1d95eb73ed79b"
2112
+ }
2113
+ },
2114
+ "metadata": {}
2115
+ },
2116
+ {
2117
+ "output_type": "display_data",
2118
+ "data": {
2119
+ "text/plain": [
2120
+ "tokenizer.json: 0%| | 0.00/2.22M [00:00<?, ?B/s]"
2121
+ ],
2122
+ "application/vnd.jupyter.widget-view+json": {
2123
+ "version_major": 2,
2124
+ "version_minor": 0,
2125
+ "model_id": "d713cb1329124a55829e196919bbeebe"
2126
+ }
2127
+ },
2128
+ "metadata": {}
2129
+ },
2130
+ {
2131
+ "output_type": "display_data",
2132
+ "data": {
2133
+ "text/plain": [
2134
+ "special_tokens_map.json: 0%| | 0.00/389 [00:00<?, ?B/s]"
2135
+ ],
2136
+ "application/vnd.jupyter.widget-view+json": {
2137
+ "version_major": 2,
2138
+ "version_minor": 0,
2139
+ "model_id": "71024becb22f4758be2c790063347ac9"
2140
+ }
2141
+ },
2142
+ "metadata": {}
2143
+ },
2144
+ {
2145
+ "output_type": "stream",
2146
+ "name": "stderr",
2147
+ "text": [
2148
+ "<ipython-input-2-f767de8bdd18>:22: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
2149
+ " A = torch.tensor(token[id_A])\n",
2150
+ "<ipython-input-2-f767de8bdd18>:28: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
2151
+ " C = torch.tensor(token[id_C])\n",
2152
+ "<ipython-input-2-f767de8bdd18>:84: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
2153
+ " B = torch.tensor(token[id_B])\n"
2154
+ ]
2155
+ },
2156
+ {
2157
+ "output_type": "stream",
2158
+ "name": "stdout",
2159
+ "text": [
2160
+ "[49406, 49407]\n",
2161
+ "Tokenized prompt 'mix_with' tensor C is a random valued tensor with no ID\n",
2162
+ "The similarity between A 'banana ' and C 'token C of random type' is -1.97 %\n",
2163
+ "No operation\n",
2164
+ "Calculated all cosine-similarities between the token banana with Id_A = 8922 with the the rest of the 49407 tokens as a 1x49407 tensor\n",
2165
+ "//---//\n",
2166
+ "\n",
2167
+ "Here is the result : \n",
2168
+ "\n",
2169
+ "banana \n",
2170
+ "similiarity = 100.0 %\n",
2171
+ "--------\n",
2172
+ "bananas \n",
2173
+ "similiarity = 38.93 %\n",
2174
+ "--------\n",
2175
+ "banan-\n",
2176
+ "similiarity = 30.8 %\n",
2177
+ "--------\n",
2178
+ " \n",
2179
+ "similiarity = 27.12 %\n",
2180
+ "--------\n",
2181
+ "pineapple \n",
2182
+ "similiarity = 19.7 %\n",
2183
+ "--------\n",
2184
+ "chicken \n",
2185
+ "similiarity = 19.24 %\n",
2186
+ "--------\n",
2187
+ "potassium \n",
2188
+ "similiarity = 19.21 %\n",
2189
+ "--------\n",
2190
+ "sausage \n",
2191
+ "similiarity = 19.07 %\n",
2192
+ "--------\n",
2193
+ "lemon \n",
2194
+ "similiarity = 18.82 %\n",
2195
+ "--------\n",
2196
+ "orange \n",
2197
+ "similiarity = 18.42 %\n",
2198
+ "--------\n",
2199
+ "peanut \n",
2200
+ "similiarity = 17.84 %\n",
2201
+ "--------\n",
2202
+ "parachute \n",
2203
+ "similiarity = 17.19 %\n",
2204
+ "--------\n",
2205
+ "duck-\n",
2206
+ "similiarity = 16.8 %\n",
2207
+ "--------\n",
2208
+ "yellow \n",
2209
+ "similiarity = 16.21 %\n",
2210
+ "--------\n",
2211
+ "grape \n",
2212
+ "similiarity = 16.19 %\n",
2213
+ "--------\n",
2214
+ "kangaroo \n",
2215
+ "similiarity = 16.13 %\n",
2216
+ "--------\n",
2217
+ "apple \n",
2218
+ "similiarity = 16.13 %\n",
2219
+ "--------\n",
2220
+ "tangerine \n",
2221
+ "similiarity = 16.08 %\n",
2222
+ "--------\n",
2223
+ "giraffe \n",
2224
+ "similiarity = 16.04 %\n",
2225
+ "--------\n",
2226
+ "mango \n",
2227
+ "similiarity = 16.03 %\n",
2228
+ "--------\n",
2229
+ "rubber \n",
2230
+ "similiarity = 15.95 %\n",
2231
+ "--------\n",
2232
+ "bamboo \n",
2233
+ "similiarity = 15.88 %\n",
2234
+ "--------\n",
2235
+ "umbrella \n",
2236
+ "similiarity = 15.82 %\n",
2237
+ "--------\n",
2238
+ "nutella \n",
2239
+ "similiarity = 15.69 %\n",
2240
+ "--------\n",
2241
+ "ferrari \n",
2242
+ "similiarity = 15.69 %\n",
2243
+ "--------\n",
2244
+ "oranges \n",
2245
+ "similiarity = 15.65 %\n",
2246
+ "--------\n",
2247
+ "peanuts \n",
2248
+ "similiarity = 15.62 %\n",
2249
+ "--------\n",
2250
+ "ali \n",
2251
+ "similiarity = 15.49 %\n",
2252
+ "--------\n",
2253
+ "cucumber \n",
2254
+ "similiarity = 15.32 %\n",
2255
+ "--------\n",
2256
+ "potato \n",
2257
+ "similiarity = 15.22 %\n",
2258
+ "--------\n",
2259
+ "monkey \n",
2260
+ "similiarity = 15.2 %\n",
2261
+ "--------\n",
2262
+ "croissant \n",
2263
+ "similiarity = 15.18 %\n",
2264
+ "--------\n",
2265
+ "papaya \n",
2266
+ "similiarity = 15.17 %\n",
2267
+ "--------\n",
2268
+ "christmas \n",
2269
+ "similiarity = 15.12 %\n",
2270
+ "--------\n",
2271
+ "sandwich \n",
2272
+ "similiarity = 15.0 %\n",
2273
+ "--------\n",
2274
+ "rainbow \n",
2275
+ "similiarity = 14.98 %\n",
2276
+ "--------\n",
2277
+ "tomato \n",
2278
+ "similiarity = 14.96 %\n",
2279
+ "--------\n",
2280
+ "martini \n",
2281
+ "similiarity = 14.93 %\n",
2282
+ "--------\n",
2283
+ "cabaret \n",
2284
+ "similiarity = 14.83 %\n",
2285
+ "--------\n",
2286
+ "ginger \n",
2287
+ "similiarity = 14.82 %\n",
2288
+ "--------\n",
2289
+ "animal \n",
2290
+ "similiarity = 14.76 %\n",
2291
+ "--------\n",
2292
+ "vanilla \n",
2293
+ "similiarity = 14.73 %\n",
2294
+ "--------\n",
2295
+ "mustache \n",
2296
+ "similiarity = 14.64 %\n",
2297
+ "--------\n",
2298
+ "lime \n",
2299
+ "similiarity = 14.62 %\n",
2300
+ "--------\n",
2301
+ "sickle \n",
2302
+ "similiarity = 14.6 %\n",
2303
+ "--------\n",
2304
+ "vista \n",
2305
+ "similiarity = 14.53 %\n",
2306
+ "--------\n",
2307
+ "coconut \n",
2308
+ "similiarity = 14.52 %\n",
2309
+ "--------\n",
2310
+ "kara \n",
2311
+ "similiarity = 14.46 %\n",
2312
+ "--------\n",
2313
+ "alligator \n",
2314
+ "similiarity = 14.39 %\n",
2315
+ "--------\n",
2316
+ "blueberry \n",
2317
+ "similiarity = 14.34 %\n",
2318
+ "--------\n",
2319
+ "squirrel \n",
2320
+ "similiarity = 14.29 %\n",
2321
+ "--------\n",
2322
+ "atore \n",
2323
+ "similiarity = 14.19 %\n",
2324
+ "--------\n",
2325
+ "watermelon \n",
2326
+ "similiarity = 14.13 %\n",
2327
+ "--------\n",
2328
+ "nana \n",
2329
+ "similiarity = 14.09 %\n",
2330
+ "--------\n",
2331
+ "latex \n",
2332
+ "similiarity = 14.08 %\n",
2333
+ "--------\n",
2334
+ "agricultural \n",
2335
+ "similiarity = 14.02 %\n",
2336
+ "--------\n",
2337
+ "zucchini \n",
2338
+ "similiarity = 14.0 %\n",
2339
+ "--------\n",
2340
+ "saxophone \n",
2341
+ "similiarity = 13.93 %\n",
2342
+ "--------\n",
2343
+ "mozzarella \n",
2344
+ "similiarity = 13.91 %\n",
2345
+ "--------\n",
2346
+ "eggplant \n",
2347
+ "similiarity = 13.9 %\n",
2348
+ "--------\n",
2349
+ "pickle \n",
2350
+ "similiarity = 13.89 %\n",
2351
+ "--------\n",
2352
+ "tortilla \n",
2353
+ "similiarity = 13.88 %\n",
2354
+ "--------\n",
2355
+ "maniac \n",
2356
+ "similiarity = 13.84 %\n",
2357
+ "--------\n",
2358
+ "milk \n",
2359
+ "similiarity = 13.83 %\n",
2360
+ "--------\n",
2361
+ "cellphone \n",
2362
+ "similiarity = 13.78 %\n",
2363
+ "--------\n",
2364
+ "duck \n",
2365
+ "similiarity = 13.73 %\n",
2366
+ "--------\n",
2367
+ "umbrel-\n",
2368
+ "similiarity = 13.71 %\n",
2369
+ "--------\n",
2370
+ "fanny \n",
2371
+ "similiarity = 13.69 %\n",
2372
+ "--------\n",
2373
+ "twister \n",
2374
+ "similiarity = 13.67 %\n",
2375
+ "--------\n",
2376
+ "moustache \n",
2377
+ "similiarity = 13.66 %\n",
2378
+ "--------\n",
2379
+ "manafort \n",
2380
+ "similiarity = 13.66 %\n",
2381
+ "--------\n",
2382
+ "grapefruit \n",
2383
+ "similiarity = 13.6 %\n",
2384
+ "--------\n",
2385
+ "broom \n",
2386
+ "similiarity = 13.59 %\n",
2387
+ "--------\n",
2388
+ "scorpion \n",
2389
+ "similiarity = 13.59 %\n",
2390
+ "--------\n",
2391
+ "fruit-\n",
2392
+ "similiarity = 13.57 %\n",
2393
+ "--------\n",
2394
+ "agan-\n",
2395
+ "similiarity = 13.53 %\n",
2396
+ "--------\n",
2397
+ "sunflower \n",
2398
+ "similiarity = 13.49 %\n",
2399
+ "--------\n",
2400
+ "banc-\n",
2401
+ "similiarity = 13.46 %\n",
2402
+ "--------\n",
2403
+ "literature \n",
2404
+ "similiarity = 13.45 %\n",
2405
+ "--------\n",
2406
+ "pelican \n",
2407
+ "similiarity = 13.43 %\n",
2408
+ "--------\n",
2409
+ "breakfast \n",
2410
+ "similiarity = 13.42 %\n",
2411
+ "--------\n",
2412
+ "pear \n",
2413
+ "similiarity = 13.42 %\n",
2414
+ "--------\n",
2415
+ "orange-\n",
2416
+ "similiarity = 13.4 %\n",
2417
+ "--------\n",
2418
+ "monet \n",
2419
+ "similiarity = 13.4 %\n",
2420
+ "--------\n",
2421
+ "snake \n",
2422
+ "similiarity = 13.32 %\n",
2423
+ "--------\n",
2424
+ "vampire \n",
2425
+ "similiarity = 13.32 %\n",
2426
+ "--------\n",
2427
+ "cinnamon \n",
2428
+ "similiarity = 13.3 %\n",
2429
+ "--------\n",
2430
+ "strawberries \n",
2431
+ "similiarity = 13.29 %\n",
2432
+ "--------\n",
2433
+ "butternut \n",
2434
+ "similiarity = 13.22 %\n",
2435
+ "--------\n",
2436
+ "sausages \n",
2437
+ "similiarity = 13.22 %\n",
2438
+ "--------\n",
2439
+ "iphone \n",
2440
+ "similiarity = 13.21 %\n",
2441
+ "--------\n",
2442
+ "egg-\n",
2443
+ "similiarity = 13.2 %\n",
2444
+ "--------\n",
2445
+ "capu-\n",
2446
+ "similiarity = 13.2 %\n",
2447
+ "--------\n",
2448
+ "mannequin \n",
2449
+ "similiarity = 13.19 %\n",
2450
+ "--------\n",
2451
+ "cucumbers \n",
2452
+ "similiarity = 13.16 %\n",
2453
+ "--------\n",
2454
+ "champagne \n",
2455
+ "similiarity = 13.15 %\n",
2456
+ "--------\n",
2457
+ "triangle \n",
2458
+ "similiarity = 13.14 %\n",
2459
+ "--------\n",
2460
+ "apples \n",
2461
+ "similiarity = 13.09 %\n",
2462
+ "--------\n",
2463
+ "dynamite \n",
2464
+ "similiarity = 13.08 %\n",
2465
+ "--------\n",
2466
+ "chocolate \n",
2467
+ "similiarity = 13.08 %\n",
2468
+ "--------\n"
2469
+ ]
2470
+ }
2471
+ ]
2472
  },
2473
  {
2474
  "cell_type": "markdown",
 
2529
  ],
2530
  "metadata": {
2531
  "id": "ke6mZ1RZDOeB",
2532
+ "outputId": "8f8c9d3f-cbda-4d9a-d126-c7f9311a74ee",
2533
  "colab": {
2534
  "base_uri": "https://localhost:8080/",
2535
  "height": 1000
 
2557
  "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
2558
  "from transformers import CLIPProcessor, CLIPModel\n",
2559
  "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-large-patch14\" , clean_up_tokenization_spaces = True)\n",
2560
+ "model = CLIPModel.from_pretrained(\"openai/clip-vit-large-patch14\")\n",
2561
  "\n",
2562
  "# Get image features\n",
2563
  "inputs = processor(images=image_A, return_tensors=\"pt\")\n",
 
2594
  "d.close() #close the file"
2595
  ],
2596
  "metadata": {
2597
+ "id": "gaOB8rsOneIa"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2598
  },
2599
+ "execution_count": null,
2600
+ "outputs": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2601
  },
2602
  {
2603
  "cell_type": "code",
 
2628
  ],
2629
  "metadata": {
2630
  "id": "eZqMUhP0qYaK",
2631
+ "outputId": "4801cded-e73c-4c0b-eb6e-608ed899ff49",
2632
  "colab": {
2633
  "base_uri": "https://localhost:8080/"
2634
  }