codeShare commited on
Commit
e754fde
·
verified ·
1 Parent(s): 38a8e51

Upload sd_token_similarity_calculator.ipynb

Browse files
Files changed (1) hide show
  1. sd_token_similarity_calculator.ipynb +20 -398
sd_token_similarity_calculator.ipynb CHANGED
@@ -83,28 +83,10 @@
83
  ],
84
  "metadata": {
85
  "id": "Ch9puvwKH1s3",
86
- "collapsed": true,
87
- "colab": {
88
- "base_uri": "https://localhost:8080/"
89
- },
90
- "outputId": "982a9210-a3fd-4d90-bef7-5aa6f5864797"
91
  },
92
- "execution_count": 2,
93
- "outputs": [
94
- {
95
- "output_type": "stream",
96
- "name": "stdout",
97
- "text": [
98
- "Cloning into 'sd_tokens'...\n",
99
- "remote: Enumerating objects: 10, done.\u001b[K\n",
100
- "remote: Counting objects: 100% (7/7), done.\u001b[K\n",
101
- "remote: Compressing objects: 100% (7/7), done.\u001b[K\n",
102
- "remote: Total 10 (delta 1), reused 0 (delta 0), pack-reused 3 (from 1)\u001b[K\n",
103
- "Unpacking objects: 100% (10/10), 306.93 KiB | 4.72 MiB/s, done.\n",
104
- "/content/sd_tokens\n"
105
- ]
106
- }
107
- ]
108
  },
109
  {
110
  "cell_type": "code",
@@ -131,7 +113,7 @@
131
  },
132
  "outputId": "86f2f01e-6a04-4292-cee7-70fd8398e07f"
133
  },
134
- "execution_count": 3,
135
  "outputs": [
136
  {
137
  "output_type": "stream",
@@ -167,7 +149,7 @@
167
  "metadata": {
168
  "id": "YqdiF8DIz9Wu"
169
  },
170
- "execution_count": 4,
171
  "outputs": []
172
  },
173
  {
@@ -221,23 +203,10 @@
221
  ],
222
  "metadata": {
223
  "id": "oXbNSRSKPgRr",
224
- "collapsed": true,
225
- "colab": {
226
- "base_uri": "https://localhost:8080/"
227
- },
228
- "outputId": "76f8ec94-d29c-46d9-893b-49875f3a1949"
229
  },
230
- "execution_count": 5,
231
- "outputs": [
232
- {
233
- "output_type": "stream",
234
- "name": "stdout",
235
- "text": [
236
- "Tokenized prompt 'mix_with' tensor C is a random valued tensor with no ID\n",
237
- "No operation\n"
238
- ]
239
- }
240
- ]
241
  },
242
  {
243
  "cell_type": "code",
@@ -276,22 +245,10 @@
276
  ],
277
  "metadata": {
278
  "id": "juxsvco9B0iV",
279
- "collapsed": true,
280
- "colab": {
281
- "base_uri": "https://localhost:8080/"
282
- },
283
- "outputId": "dc893bbf-e9cb-425c-95b8-ffafd3ab2fbc"
284
  },
285
- "execution_count": 6,
286
- "outputs": [
287
- {
288
- "output_type": "stream",
289
- "name": "stdout",
290
- "text": [
291
- "Calculated all cosine-similarities between the token banana</w> with Id_A = 8922 with the the rest of the 49407 tokens as a 1x49407 tensor\n"
292
- ]
293
- }
294
- ]
295
  },
296
  {
297
  "cell_type": "markdown",
@@ -325,321 +282,10 @@
325
  ],
326
  "metadata": {
327
  "id": "YIEmLAzbHeuo",
328
- "collapsed": true,
329
- "colab": {
330
- "base_uri": "https://localhost:8080/"
331
- },
332
- "outputId": "4a2fa70f-16ff-4bba-fb01-d39ad697d4ff"
333
  },
334
- "execution_count": 7,
335
- "outputs": [
336
- {
337
- "output_type": "stream",
338
- "name": "stdout",
339
- "text": [
340
- "banana</w>\n",
341
- "similiarity = 100.0 %\n",
342
- "--------\n",
343
- "bananas</w>\n",
344
- "similiarity = 38.93 %\n",
345
- "--------\n",
346
- "banan\n",
347
- "similiarity = 30.8 %\n",
348
- "--------\n",
349
- "ðŁįĮ</w>\n",
350
- "similiarity = 27.12 %\n",
351
- "--------\n",
352
- "pineapple</w>\n",
353
- "similiarity = 19.7 %\n",
354
- "--------\n",
355
- "chicken</w>\n",
356
- "similiarity = 19.24 %\n",
357
- "--------\n",
358
- "potassium</w>\n",
359
- "similiarity = 19.21 %\n",
360
- "--------\n",
361
- "sausage</w>\n",
362
- "similiarity = 19.07 %\n",
363
- "--------\n",
364
- "lemon</w>\n",
365
- "similiarity = 18.82 %\n",
366
- "--------\n",
367
- "orange</w>\n",
368
- "similiarity = 18.42 %\n",
369
- "--------\n",
370
- "peanut</w>\n",
371
- "similiarity = 17.84 %\n",
372
- "--------\n",
373
- "parachute</w>\n",
374
- "similiarity = 17.19 %\n",
375
- "--------\n",
376
- "duck\n",
377
- "similiarity = 16.8 %\n",
378
- "--------\n",
379
- "yellow</w>\n",
380
- "similiarity = 16.21 %\n",
381
- "--------\n",
382
- "grape</w>\n",
383
- "similiarity = 16.19 %\n",
384
- "--------\n",
385
- "kangaroo</w>\n",
386
- "similiarity = 16.13 %\n",
387
- "--------\n",
388
- "apple</w>\n",
389
- "similiarity = 16.13 %\n",
390
- "--------\n",
391
- "tangerine</w>\n",
392
- "similiarity = 16.08 %\n",
393
- "--------\n",
394
- "giraffe</w>\n",
395
- "similiarity = 16.04 %\n",
396
- "--------\n",
397
- "mango</w>\n",
398
- "similiarity = 16.03 %\n",
399
- "--------\n",
400
- "rubber</w>\n",
401
- "similiarity = 15.95 %\n",
402
- "--------\n",
403
- "bamboo</w>\n",
404
- "similiarity = 15.88 %\n",
405
- "--------\n",
406
- "umbrella</w>\n",
407
- "similiarity = 15.82 %\n",
408
- "--------\n",
409
- "nutella</w>\n",
410
- "similiarity = 15.69 %\n",
411
- "--------\n",
412
- "ferrari</w>\n",
413
- "similiarity = 15.69 %\n",
414
- "--------\n",
415
- "oranges</w>\n",
416
- "similiarity = 15.65 %\n",
417
- "--------\n",
418
- "peanuts</w>\n",
419
- "similiarity = 15.62 %\n",
420
- "--------\n",
421
- "ali</w>\n",
422
- "similiarity = 15.49 %\n",
423
- "--------\n",
424
- "cucumber</w>\n",
425
- "similiarity = 15.32 %\n",
426
- "--------\n",
427
- "potato</w>\n",
428
- "similiarity = 15.22 %\n",
429
- "--------\n",
430
- "monkey</w>\n",
431
- "similiarity = 15.2 %\n",
432
- "--------\n",
433
- "croissant</w>\n",
434
- "similiarity = 15.18 %\n",
435
- "--------\n",
436
- "papaya</w>\n",
437
- "similiarity = 15.17 %\n",
438
- "--------\n",
439
- "christmas</w>\n",
440
- "similiarity = 15.12 %\n",
441
- "--------\n",
442
- "sandwich</w>\n",
443
- "similiarity = 15.0 %\n",
444
- "--------\n",
445
- "rainbow</w>\n",
446
- "similiarity = 14.98 %\n",
447
- "--------\n",
448
- "tomato</w>\n",
449
- "similiarity = 14.96 %\n",
450
- "--------\n",
451
- "martini</w>\n",
452
- "similiarity = 14.93 %\n",
453
- "--------\n",
454
- "cabaret</w>\n",
455
- "similiarity = 14.83 %\n",
456
- "--------\n",
457
- "ginger</w>\n",
458
- "similiarity = 14.82 %\n",
459
- "--------\n",
460
- "animal</w>\n",
461
- "similiarity = 14.76 %\n",
462
- "--------\n",
463
- "vanilla</w>\n",
464
- "similiarity = 14.73 %\n",
465
- "--------\n",
466
- "mustache</w>\n",
467
- "similiarity = 14.64 %\n",
468
- "--------\n",
469
- "lime</w>\n",
470
- "similiarity = 14.62 %\n",
471
- "--------\n",
472
- "sickle</w>\n",
473
- "similiarity = 14.6 %\n",
474
- "--------\n",
475
- "vista</w>\n",
476
- "similiarity = 14.53 %\n",
477
- "--------\n",
478
- "coconut</w>\n",
479
- "similiarity = 14.52 %\n",
480
- "--------\n",
481
- "kara</w>\n",
482
- "similiarity = 14.46 %\n",
483
- "--------\n",
484
- "alligator</w>\n",
485
- "similiarity = 14.39 %\n",
486
- "--------\n",
487
- "blueberry</w>\n",
488
- "similiarity = 14.34 %\n",
489
- "--------\n",
490
- "squirrel</w>\n",
491
- "similiarity = 14.29 %\n",
492
- "--------\n",
493
- "atore</w>\n",
494
- "similiarity = 14.19 %\n",
495
- "--------\n",
496
- "watermelon</w>\n",
497
- "similiarity = 14.13 %\n",
498
- "--------\n",
499
- "nana</w>\n",
500
- "similiarity = 14.09 %\n",
501
- "--------\n",
502
- "latex</w>\n",
503
- "similiarity = 14.08 %\n",
504
- "--------\n",
505
- "agricultural</w>\n",
506
- "similiarity = 14.02 %\n",
507
- "--------\n",
508
- "zucchini</w>\n",
509
- "similiarity = 14.0 %\n",
510
- "--------\n",
511
- "saxophone</w>\n",
512
- "similiarity = 13.93 %\n",
513
- "--------\n",
514
- "mozzarella</w>\n",
515
- "similiarity = 13.91 %\n",
516
- "--------\n",
517
- "eggplant</w>\n",
518
- "similiarity = 13.9 %\n",
519
- "--------\n",
520
- "pickle</w>\n",
521
- "similiarity = 13.89 %\n",
522
- "--------\n",
523
- "tortilla</w>\n",
524
- "similiarity = 13.88 %\n",
525
- "--------\n",
526
- "maniac</w>\n",
527
- "similiarity = 13.84 %\n",
528
- "--------\n",
529
- "milk</w>\n",
530
- "similiarity = 13.83 %\n",
531
- "--------\n",
532
- "cellphone</w>\n",
533
- "similiarity = 13.78 %\n",
534
- "--------\n",
535
- "duck</w>\n",
536
- "similiarity = 13.73 %\n",
537
- "--------\n",
538
- "umbrel\n",
539
- "similiarity = 13.71 %\n",
540
- "--------\n",
541
- "fanny</w>\n",
542
- "similiarity = 13.69 %\n",
543
- "--------\n",
544
- "twister</w>\n",
545
- "similiarity = 13.67 %\n",
546
- "--------\n",
547
- "moustache</w>\n",
548
- "similiarity = 13.66 %\n",
549
- "--------\n",
550
- "manafort</w>\n",
551
- "similiarity = 13.66 %\n",
552
- "--------\n",
553
- "grapefruit</w>\n",
554
- "similiarity = 13.6 %\n",
555
- "--------\n",
556
- "broom</w>\n",
557
- "similiarity = 13.59 %\n",
558
- "--------\n",
559
- "scorpion</w>\n",
560
- "similiarity = 13.59 %\n",
561
- "--------\n",
562
- "fruit\n",
563
- "similiarity = 13.57 %\n",
564
- "--------\n",
565
- "agan\n",
566
- "similiarity = 13.53 %\n",
567
- "--------\n",
568
- "sunflower</w>\n",
569
- "similiarity = 13.49 %\n",
570
- "--------\n",
571
- "banc\n",
572
- "similiarity = 13.46 %\n",
573
- "--------\n",
574
- "literature</w>\n",
575
- "similiarity = 13.45 %\n",
576
- "--------\n",
577
- "pelican</w>\n",
578
- "similiarity = 13.43 %\n",
579
- "--------\n",
580
- "breakfast</w>\n",
581
- "similiarity = 13.42 %\n",
582
- "--------\n",
583
- "pear</w>\n",
584
- "similiarity = 13.42 %\n",
585
- "--------\n",
586
- "orange\n",
587
- "similiarity = 13.4 %\n",
588
- "--------\n",
589
- "monet</w>\n",
590
- "similiarity = 13.4 %\n",
591
- "--------\n",
592
- "snake</w>\n",
593
- "similiarity = 13.32 %\n",
594
- "--------\n",
595
- "vampire</w>\n",
596
- "similiarity = 13.32 %\n",
597
- "--------\n",
598
- "cinnamon</w>\n",
599
- "similiarity = 13.3 %\n",
600
- "--------\n",
601
- "strawberries</w>\n",
602
- "similiarity = 13.29 %\n",
603
- "--------\n",
604
- "butternut</w>\n",
605
- "similiarity = 13.22 %\n",
606
- "--------\n",
607
- "sausages</w>\n",
608
- "similiarity = 13.22 %\n",
609
- "--------\n",
610
- "iphone</w>\n",
611
- "similiarity = 13.21 %\n",
612
- "--------\n",
613
- "egg\n",
614
- "similiarity = 13.2 %\n",
615
- "--------\n",
616
- "capu\n",
617
- "similiarity = 13.2 %\n",
618
- "--------\n",
619
- "mannequin</w>\n",
620
- "similiarity = 13.19 %\n",
621
- "--------\n",
622
- "cucumbers</w>\n",
623
- "similiarity = 13.16 %\n",
624
- "--------\n",
625
- "champagne</w>\n",
626
- "similiarity = 13.15 %\n",
627
- "--------\n",
628
- "triangle</w>\n",
629
- "similiarity = 13.14 %\n",
630
- "--------\n",
631
- "apples</w>\n",
632
- "similiarity = 13.09 %\n",
633
- "--------\n",
634
- "dynamite</w>\n",
635
- "similiarity = 13.08 %\n",
636
- "--------\n",
637
- "chocolate</w>\n",
638
- "similiarity = 13.08 %\n",
639
- "--------\n"
640
- ]
641
- }
642
- ]
643
  },
644
  {
645
  "cell_type": "code",
@@ -657,22 +303,10 @@
657
  ],
658
  "metadata": {
659
  "id": "MwmOdC9cNZty",
660
- "collapsed": true,
661
- "colab": {
662
- "base_uri": "https://localhost:8080/"
663
- },
664
- "outputId": "0dd984d0-e253-4981-d72f-40aa83d57d8b"
665
  },
666
- "execution_count": 8,
667
- "outputs": [
668
- {
669
- "output_type": "stream",
670
- "name": "stdout",
671
- "text": [
672
- "The similarity between tokens A and B is 3.671 %\n"
673
- ]
674
- }
675
- ]
676
  },
677
  {
678
  "cell_type": "code",
@@ -709,22 +343,10 @@
709
  ],
710
  "metadata": {
711
  "id": "QQOjh5BvnG8M",
712
- "collapsed": true,
713
- "colab": {
714
- "base_uri": "https://localhost:8080/"
715
- },
716
- "outputId": "8bd6eb94-c5a7-47e6-913b-346941b144a6"
717
  },
718
- "execution_count": 11,
719
- "outputs": [
720
- {
721
- "output_type": "stream",
722
- "name": "stdout",
723
- "text": [
724
- "The similarity between the text_encoding for A:\"one ripe banana\" and B: \"a long yellow fruit\" is 83.696 %\n"
725
- ]
726
- }
727
- ]
728
  },
729
  {
730
  "cell_type": "markdown",
 
83
  ],
84
  "metadata": {
85
  "id": "Ch9puvwKH1s3",
86
+ "collapsed": true
 
 
 
 
87
  },
88
+ "execution_count": null,
89
+ "outputs": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  },
91
  {
92
  "cell_type": "code",
 
113
  },
114
  "outputId": "86f2f01e-6a04-4292-cee7-70fd8398e07f"
115
  },
116
+ "execution_count": null,
117
  "outputs": [
118
  {
119
  "output_type": "stream",
 
149
  "metadata": {
150
  "id": "YqdiF8DIz9Wu"
151
  },
152
+ "execution_count": null,
153
  "outputs": []
154
  },
155
  {
 
203
  ],
204
  "metadata": {
205
  "id": "oXbNSRSKPgRr",
206
+ "collapsed": true
 
 
 
 
207
  },
208
+ "execution_count": null,
209
+ "outputs": []
 
 
 
 
 
 
 
 
 
210
  },
211
  {
212
  "cell_type": "code",
 
245
  ],
246
  "metadata": {
247
  "id": "juxsvco9B0iV",
248
+ "collapsed": true
 
 
 
 
249
  },
250
+ "execution_count": null,
251
+ "outputs": []
 
 
 
 
 
 
 
 
252
  },
253
  {
254
  "cell_type": "markdown",
 
282
  ],
283
  "metadata": {
284
  "id": "YIEmLAzbHeuo",
285
+ "collapsed": true
 
 
 
 
286
  },
287
+ "execution_count": null,
288
+ "outputs": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
  },
290
  {
291
  "cell_type": "code",
 
303
  ],
304
  "metadata": {
305
  "id": "MwmOdC9cNZty",
306
+ "collapsed": true
 
 
 
 
307
  },
308
+ "execution_count": null,
309
+ "outputs": []
 
 
 
 
 
 
 
 
310
  },
311
  {
312
  "cell_type": "code",
 
343
  ],
344
  "metadata": {
345
  "id": "QQOjh5BvnG8M",
346
+ "collapsed": true
 
 
 
 
347
  },
348
+ "execution_count": null,
349
+ "outputs": []
 
 
 
 
 
 
 
 
350
  },
351
  {
352
  "cell_type": "markdown",