codeShare commited on
Commit
7959015
·
verified ·
1 Parent(s): 1c2d1a2

Upload sd_token_similarity_calculator.ipynb

Browse files
Files changed (1) hide show
  1. sd_token_similarity_calculator.ipynb +49 -63
sd_token_similarity_calculator.ipynb CHANGED
@@ -274,19 +274,42 @@
274
  "id": "IUCuV9RtQpBn"
275
  }
276
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
277
  {
278
  "cell_type": "code",
279
  "source": [
280
  "# @title 🪐🖼️ -> 📝 Token-Sampling Image interrogator\n",
281
- "\n",
 
 
 
282
  "# @markdown # What do you want to to mimic?\n",
283
- "use = '🖼️image_encoding from image' # @param ['📝text_encoding from prompt', '🖼️image_encoding from image']\n",
284
  "# @markdown --------------------------\n",
285
  "use_token_padding = True # param {type:\"boolean\"} <---- Enabled by default\n",
286
  "prompt = \"photo of a banana\" # @param {\"type\":\"string\",\"placeholder\":\"Write a prompt\"}\n",
287
- "\n",
288
  "prompt_A = prompt\n",
289
- "\n",
290
  "from google.colab import files\n",
291
  "def upload_files():\n",
292
  " from google.colab import files\n",
@@ -297,17 +320,12 @@
297
  "#Get image\n",
298
  "# You can use \"http://images.cocodataset.org/val2017/000000039769.jpg\" for testing\n",
299
  "image_url = \"http://images.cocodataset.org/val2017/000000039769.jpg\" # @param {\"type\":\"string\",\"placeholder\":\"leave empty for local upload (scroll down to see it)\"}\n",
300
- "\n",
301
- "\n",
302
  "colab_image_path = \"\" # @param {\"type\":\"string\",\"placeholder\": \"eval. as '/content/sd_tokens/' + **your input**\"}\n",
303
- "\n",
304
  "# @markdown --------------------------\n",
305
  "from PIL import Image\n",
306
  "import requests\n",
307
  "image_A = \"\"\n",
308
- "\n",
309
  "#----#\n",
310
- "\n",
311
  "if(use == '🖼️image_encoding from image'):\n",
312
  " if image_url == \"\":\n",
313
  " import cv2\n",
@@ -323,14 +341,12 @@
323
  " else:\n",
324
  " image_A = Image.open(requests.get(image_url, stream=True).raw)\n",
325
  "#------#\n",
326
- "\n",
327
  "from transformers import AutoTokenizer\n",
328
  "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
329
  "from transformers import CLIPProcessor, CLIPModel\n",
330
  "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-large-patch14\" , clean_up_tokenization_spaces = True)\n",
331
  "model = CLIPModel.from_pretrained(\"openai/clip-vit-large-patch14\")\n",
332
- "\n",
333
- "\n",
334
  "if(use == '🖼️image_encoding from image'):\n",
335
  " # Get image features\n",
336
  " inputs = processor(images=image_A, return_tensors=\"pt\")\n",
@@ -338,34 +354,27 @@
338
  " image_features = image_features / image_features.norm(p=2, dim=-1, keepdim=True)\n",
339
  " name_A = \"the image\"\n",
340
  "#-----#\n",
341
- "\n",
342
- "\n",
343
  "if(use == '📝text_encoding from prompt'):\n",
344
  " # Get text features\n",
345
  " inputs = tokenizer(text = prompt, padding=True, return_tensors=\"pt\")\n",
346
  " text_features_A = model.get_text_features(**inputs)\n",
347
  " name_A = prompt\n",
348
  "#-----#\n",
349
- "\n",
350
- "\n",
351
  "# @markdown # The output...\n",
352
  "must_start_with = \"\" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n",
353
  "must_contain = \"banana \" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n",
354
  "must_end_with = \"\" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n",
355
  "token_B = must_contain\n",
356
- "\n",
357
  "# @markdown -----\n",
358
- "\n",
359
  "# @markdown # Use a range of tokens from the vocab.json (slow method)\n",
360
- "start_search_at_ID = 27700 # @param {type:\"slider\", min:0, max: 49407, step:100}\n",
 
 
361
  "search_range = 100 # @param {type:\"slider\", min:100, max: 2000, step:0}\n",
362
  "restrictions = 'None' # @param [\"None\", \"Suffix only\", \"Prefix only\"]\n",
363
- "\n",
364
  "#markdown Limit char size of included token <----- Disabled\n",
365
  "min_char_size = 0 #param {type:\"slider\", min:0, max: 20, step:1}\n",
366
  "char_range = 50 #param {type:\"slider\", min:0, max: 20, step:1}\n",
367
- "\n",
368
- "\n",
369
  "# markdown # ...or paste prompt items\n",
370
  "# markdown Format must be {item1|item2|...}. You can aquire prompt items using the Randomizer in the fusion gen: https://perchance.org/fusion-ai-image-generator\n",
371
  "_enable = False # param {\"type\":\"boolean\"}\n",
@@ -373,26 +382,21 @@
373
  "#-----#\n",
374
  "name_B = must_contain\n",
375
  "#-----#\n",
376
- "\n",
377
  "START = start_search_at_ID\n",
378
  "RANGE = min(search_range , 49407 - start_search_at_ID)\n",
379
- "\n",
380
  "dots = torch.zeros(RANGE)\n",
381
  "is_BC = torch.zeros(RANGE)\n",
382
- "\n",
383
  "import re\n",
384
- "\n",
385
  "for index in range(RANGE):\n",
386
  " id_C = START + index\n",
387
- " name_C = vocab[id_C]\n",
388
  " is_Prefix = 0\n",
389
- "\n",
390
- "\n",
391
  " #Skip if non-AZ characters are found\n",
392
  " if re.search(\"\\W/g\" , name_C.replace('</w>', '')):\n",
393
  " continue\n",
394
- "\n",
395
- "\n",
396
  " # Decide if we should process prefix/suffix tokens\n",
397
  " if name_C.find('</w>')<=-1:\n",
398
  " is_Prefix = 1\n",
@@ -402,7 +406,6 @@
402
  " if restrictions == \"Prefix only\":\n",
403
  " continue\n",
404
  " #-----#\n",
405
- "\n",
406
  " # Decide if char-size is within range\n",
407
  " if len(name_C) < min_char_size:\n",
408
  " continue\n",
@@ -413,7 +416,6 @@
413
  " if is_Prefix>0:\n",
414
  " name_CB = must_start_with + ' ' + name_C.strip() + '-' + name_B.strip() + ' ' + must_end_with\n",
415
  " #-----#\n",
416
- "\n",
417
  " if(use == '🖼️image_encoding from image'):\n",
418
  " ids_CB = processor.tokenizer(text=name_CB, padding=use_token_padding, return_tensors=\"pt\")\n",
419
  " text_features = model.get_text_features(**ids_CB)\n",
@@ -422,16 +424,12 @@
422
  " torch.matmul(text_features, image_features.t()) * logit_scale\n",
423
  " sim_CB = torch.nn.functional.cosine_similarity(text_features, image_features) * logit_scale\n",
424
  " #-----#\n",
425
- "\n",
426
  " if(use == '📝text_encoding from prompt'):\n",
427
  " ids_CB = processor.tokenizer(text=name_CB, padding=use_token_padding, return_tensors=\"pt\")\n",
428
  " text_features = model.get_text_features(**ids_CB)\n",
429
  " text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)\n",
430
  " sim_CB = torch.nn.functional.cosine_similarity(text_features, text_features_A)\n",
431
  " #-----#\n",
432
- "\n",
433
- "\n",
434
- "\n",
435
  " #-----#\n",
436
  " if restrictions == \"Prefix only\":\n",
437
  " result = sim_CB\n",
@@ -439,7 +437,6 @@
439
  " dots[index] = result\n",
440
  " continue\n",
441
  " #-----#\n",
442
- "\n",
443
  " if(use == '🖼️image_encoding from image'):\n",
444
  " name_BC = must_start_with + name_B + name_C + must_end_with\n",
445
  " ids_BC = processor.tokenizer(text=name_BC, padding=use_token_padding, return_tensors=\"pt\")\n",
@@ -449,7 +446,6 @@
449
  " torch.matmul(text_features, image_features.t()) * logit_scale\n",
450
  " sim_BC = torch.nn.functional.cosine_similarity(text_features, image_features) * logit_scale\n",
451
  " #-----#\n",
452
- "\n",
453
  " if(use == '📝text_encoding from prompt'):\n",
454
  " name_BC = must_start_with + name_B + name_C + must_end_with\n",
455
  " ids_BC = processor.tokenizer(text=name_BC, padding=use_token_padding, return_tensors=\"pt\")\n",
@@ -457,20 +453,16 @@
457
  " text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)\n",
458
  " sim_BC = torch.nn.functional.cosine_similarity(text_features, text_features_A)\n",
459
  " #-----#\n",
460
- "\n",
461
  " result = sim_CB\n",
462
  " if(sim_BC > sim_CB):\n",
463
  " is_BC[index] = 1\n",
464
  " result = sim_BC\n",
465
- "\n",
466
  " #result = absolute_value(result.item())\n",
467
  " result = result.item()\n",
468
  " dots[index] = result\n",
469
  "#----#\n",
470
- "\n",
471
  "sorted, indices = torch.sort(dots,dim=0 , descending=True)\n",
472
- "\n",
473
- "\n",
474
  "# @markdown ----------\n",
475
  "# @markdown # Print options\n",
476
  "list_size = 100 # @param {type:'number'}\n",
@@ -478,11 +470,10 @@
478
  "print_Similarity = True # @param {type:\"boolean\"}\n",
479
  "print_Name = True # @param {type:\"boolean\"}\n",
480
  "print_Divider = True # @param {type:\"boolean\"}\n",
481
- "\n",
482
- "\n",
483
  "if (print_Divider):\n",
484
  " print('//---//')\n",
485
- "\n",
486
  "print('')\n",
487
  "print(f'These token pairings within the range ID = {START} to ID = {START + RANGE} most closely match the text_encoding for {prompt_A} : ')\n",
488
  "print('')\n",
@@ -499,7 +490,7 @@
499
  "#----#\n",
500
  "for index in range(min(list_size,RANGE)):\n",
501
  " id = START + indices[index].item()\n",
502
- " name = vocab[id]\n",
503
  " #-----#\n",
504
  " if (name.find('</w>')<=-1):\n",
505
  " name = name + '-'\n",
@@ -511,7 +502,7 @@
511
  " aheads = aheads + name + \"|\"\n",
512
  " #----#\n",
513
  " sim = sorted[index].item()\n",
514
- "\n",
515
  " if(is_BC[index]>0):\n",
516
  " if sim>max_sim_ahead:\n",
517
  " max_sim_ahead = sim\n",
@@ -520,7 +511,6 @@
520
  " if sim>max_sim_trail:\n",
521
  " max_sim_trail = sim\n",
522
  " max_name_trail = name\n",
523
- "\n",
524
  "#------#\n",
525
  "trails = (trails + \"&&&&\").replace(\"|&&&&\", \"}\").replace(\"</w>\", \" \").replace(\"{&&&&\", \"\")\n",
526
  "aheads = (aheads + \"&&&&\").replace(\"|&&&&\", \"}\").replace(\"</w>\", \" \").replace(\"{&&&&\", \"\")\n",
@@ -537,10 +527,9 @@
537
  "#-----#\n",
538
  "#STEP 2\n",
539
  "import random\n",
540
- "\n",
541
  "names = {}\n",
542
- "\n",
543
- "NUM_PERMUTATIONS = 4 # 0 1 2 3\n",
544
  "dots = torch.zeros(NUM_PERMUTATIONS)\n",
545
  "for index in range(NUM_PERMUTATIONS):\n",
546
  " name = must_start_with\n",
@@ -551,7 +540,7 @@
551
  " name = name + must_end_with\n",
552
  " #----#\n",
553
  " ids = processor.tokenizer(text=name, padding=use_token_padding, return_tensors=\"pt\")\n",
554
- "\n",
555
  " if(use == '🖼️image_encoding from image'):\n",
556
  " text_features = model.get_text_features(**ids)\n",
557
  " text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)\n",
@@ -559,26 +548,22 @@
559
  " torch.matmul(text_features, image_features.t()) * logit_scale\n",
560
  " sim = torch.nn.functional.cosine_similarity(text_features, image_features) * logit_scale\n",
561
  " #-----#\n",
562
- "\n",
563
  " if(use == '📝text_encoding from prompt'):\n",
564
  " text_features = model.get_text_features(**ids)\n",
565
  " text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)\n",
566
  " sim = torch.nn.functional.cosine_similarity(text_features, text_features_A)\n",
567
  " #-----#\n",
568
- "\n",
569
- "\n",
570
  " dots[index] = sim\n",
571
  " names[index] = name\n",
572
- "\n",
573
- "\n",
574
  "#------#\n",
575
- "\n",
576
  "sorted, indices = torch.sort(dots,dim=0 , descending=True)\n",
577
- "\n",
578
  "for index in range(NUM_PERMUTATIONS):\n",
579
  " print(names[indices[index].item()])\n",
580
  " print(f'similiarity = {round(sorted[index].item(),2)} %')\n",
581
- " print('------')"
 
 
582
  ],
583
  "metadata": {
584
  "collapsed": true,
@@ -620,7 +605,8 @@
620
  ],
621
  "metadata": {
622
  "id": "QQOjh5BvnG8M",
623
- "collapsed": true
 
624
  },
625
  "execution_count": null,
626
  "outputs": []
 
274
  "id": "IUCuV9RtQpBn"
275
  }
276
  },
277
+ {
278
+ "cell_type": "code",
279
+ "source": [
280
+ "# @title ⚡💾 Save results as .db file\n",
281
+ "import shelve\n",
282
+ "d = shelve.open('tokens_most_similiar_to_' + name_A.replace('</w>','').strip())\n",
283
+ "#NUM TOKENS == 49407\n",
284
+ "for index in range(NUM_TOKENS):\n",
285
+ " #print(d[f'{index}']) #<-----Use this to read values from the .db file\n",
286
+ " d[f'{index}']= vocab[indices[index].item()] #<---- write values to .db file\n",
287
+ "#----#\n",
288
+ "d.close() #close the file\n",
289
+ "# See this link for additional stuff to do with shelve: https://docs.python.org/3/library/shelve.html"
290
+ ],
291
+ "metadata": {
292
+ "id": "qj888fPEbX8K"
293
+ },
294
+ "execution_count": 15,
295
+ "outputs": []
296
+ },
297
  {
298
  "cell_type": "code",
299
  "source": [
300
  "# @title 🪐🖼️ -> 📝 Token-Sampling Image interrogator\n",
301
+ "VOCAB_FILENAME = 'tokens_most_similiar_to_girl' #This vocab has been ordered where lowest index has the highest similarity to the reference vector \"girl</w>\". Feel free to create your own .db around a target token in above cells.\n",
302
+ "#-----#\n",
303
+ "import shelve\n",
304
+ "db_vocab = shelve.open(VOCAB_FILENAME)\n",
305
  "# @markdown # What do you want to to mimic?\n",
306
+ "use = '📝text_encoding from prompt' # @param ['📝text_encoding from prompt', '🖼️image_encoding from image']\n",
307
  "# @markdown --------------------------\n",
308
  "use_token_padding = True # param {type:\"boolean\"} <---- Enabled by default\n",
309
  "prompt = \"photo of a banana\" # @param {\"type\":\"string\",\"placeholder\":\"Write a prompt\"}\n",
310
+ "#-----#\n",
311
  "prompt_A = prompt\n",
312
+ "#-----#\n",
313
  "from google.colab import files\n",
314
  "def upload_files():\n",
315
  " from google.colab import files\n",
 
320
  "#Get image\n",
321
  "# You can use \"http://images.cocodataset.org/val2017/000000039769.jpg\" for testing\n",
322
  "image_url = \"http://images.cocodataset.org/val2017/000000039769.jpg\" # @param {\"type\":\"string\",\"placeholder\":\"leave empty for local upload (scroll down to see it)\"}\n",
 
 
323
  "colab_image_path = \"\" # @param {\"type\":\"string\",\"placeholder\": \"eval. as '/content/sd_tokens/' + **your input**\"}\n",
 
324
  "# @markdown --------------------------\n",
325
  "from PIL import Image\n",
326
  "import requests\n",
327
  "image_A = \"\"\n",
 
328
  "#----#\n",
 
329
  "if(use == '🖼️image_encoding from image'):\n",
330
  " if image_url == \"\":\n",
331
  " import cv2\n",
 
341
  " else:\n",
342
  " image_A = Image.open(requests.get(image_url, stream=True).raw)\n",
343
  "#------#\n",
 
344
  "from transformers import AutoTokenizer\n",
345
  "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
346
  "from transformers import CLIPProcessor, CLIPModel\n",
347
  "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-large-patch14\" , clean_up_tokenization_spaces = True)\n",
348
  "model = CLIPModel.from_pretrained(\"openai/clip-vit-large-patch14\")\n",
349
+ "#-----#\n",
 
350
  "if(use == '🖼️image_encoding from image'):\n",
351
  " # Get image features\n",
352
  " inputs = processor(images=image_A, return_tensors=\"pt\")\n",
 
354
  " image_features = image_features / image_features.norm(p=2, dim=-1, keepdim=True)\n",
355
  " name_A = \"the image\"\n",
356
  "#-----#\n",
 
 
357
  "if(use == '📝text_encoding from prompt'):\n",
358
  " # Get text features\n",
359
  " inputs = tokenizer(text = prompt, padding=True, return_tensors=\"pt\")\n",
360
  " text_features_A = model.get_text_features(**inputs)\n",
361
  " name_A = prompt\n",
362
  "#-----#\n",
 
 
363
  "# @markdown # The output...\n",
364
  "must_start_with = \"\" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n",
365
  "must_contain = \"banana \" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n",
366
  "must_end_with = \"\" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n",
367
  "token_B = must_contain\n",
 
368
  "# @markdown -----\n",
 
369
  "# @markdown # Use a range of tokens from the vocab.json (slow method)\n",
370
+ "start_search_at_index = 1700 # @param {type:\"slider\", min:0, max: 49407, step:100}\n",
371
+ "# @markdown The lower the start_index, the more similiar the sampled tokens will be to the reference token \"girl\\</w>\"\n",
372
+ "start_search_at_ID = start_search_at_index\n",
373
  "search_range = 100 # @param {type:\"slider\", min:100, max: 2000, step:0}\n",
374
  "restrictions = 'None' # @param [\"None\", \"Suffix only\", \"Prefix only\"]\n",
 
375
  "#markdown Limit char size of included token <----- Disabled\n",
376
  "min_char_size = 0 #param {type:\"slider\", min:0, max: 20, step:1}\n",
377
  "char_range = 50 #param {type:\"slider\", min:0, max: 20, step:1}\n",
 
 
378
  "# markdown # ...or paste prompt items\n",
379
  "# markdown Format must be {item1|item2|...}. You can aquire prompt items using the Randomizer in the fusion gen: https://perchance.org/fusion-ai-image-generator\n",
380
  "_enable = False # param {\"type\":\"boolean\"}\n",
 
382
  "#-----#\n",
383
  "name_B = must_contain\n",
384
  "#-----#\n",
 
385
  "START = start_search_at_ID\n",
386
  "RANGE = min(search_range , 49407 - start_search_at_ID)\n",
387
+ "#-----#\n",
388
  "dots = torch.zeros(RANGE)\n",
389
  "is_BC = torch.zeros(RANGE)\n",
 
390
  "import re\n",
391
+ "#-----#\n",
392
  "for index in range(RANGE):\n",
393
  " id_C = START + index\n",
394
+ " name_C = db_vocab[f'{id_C}']\n",
395
  " is_Prefix = 0\n",
 
 
396
  " #Skip if non-AZ characters are found\n",
397
  " if re.search(\"\\W/g\" , name_C.replace('</w>', '')):\n",
398
  " continue\n",
399
+ " #-----#\n",
 
400
  " # Decide if we should process prefix/suffix tokens\n",
401
  " if name_C.find('</w>')<=-1:\n",
402
  " is_Prefix = 1\n",
 
406
  " if restrictions == \"Prefix only\":\n",
407
  " continue\n",
408
  " #-----#\n",
 
409
  " # Decide if char-size is within range\n",
410
  " if len(name_C) < min_char_size:\n",
411
  " continue\n",
 
416
  " if is_Prefix>0:\n",
417
  " name_CB = must_start_with + ' ' + name_C.strip() + '-' + name_B.strip() + ' ' + must_end_with\n",
418
  " #-----#\n",
 
419
  " if(use == '🖼️image_encoding from image'):\n",
420
  " ids_CB = processor.tokenizer(text=name_CB, padding=use_token_padding, return_tensors=\"pt\")\n",
421
  " text_features = model.get_text_features(**ids_CB)\n",
 
424
  " torch.matmul(text_features, image_features.t()) * logit_scale\n",
425
  " sim_CB = torch.nn.functional.cosine_similarity(text_features, image_features) * logit_scale\n",
426
  " #-----#\n",
 
427
  " if(use == '📝text_encoding from prompt'):\n",
428
  " ids_CB = processor.tokenizer(text=name_CB, padding=use_token_padding, return_tensors=\"pt\")\n",
429
  " text_features = model.get_text_features(**ids_CB)\n",
430
  " text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)\n",
431
  " sim_CB = torch.nn.functional.cosine_similarity(text_features, text_features_A)\n",
432
  " #-----#\n",
 
 
 
433
  " #-----#\n",
434
  " if restrictions == \"Prefix only\":\n",
435
  " result = sim_CB\n",
 
437
  " dots[index] = result\n",
438
  " continue\n",
439
  " #-----#\n",
 
440
  " if(use == '🖼️image_encoding from image'):\n",
441
  " name_BC = must_start_with + name_B + name_C + must_end_with\n",
442
  " ids_BC = processor.tokenizer(text=name_BC, padding=use_token_padding, return_tensors=\"pt\")\n",
 
446
  " torch.matmul(text_features, image_features.t()) * logit_scale\n",
447
  " sim_BC = torch.nn.functional.cosine_similarity(text_features, image_features) * logit_scale\n",
448
  " #-----#\n",
 
449
  " if(use == '📝text_encoding from prompt'):\n",
450
  " name_BC = must_start_with + name_B + name_C + must_end_with\n",
451
  " ids_BC = processor.tokenizer(text=name_BC, padding=use_token_padding, return_tensors=\"pt\")\n",
 
453
  " text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)\n",
454
  " sim_BC = torch.nn.functional.cosine_similarity(text_features, text_features_A)\n",
455
  " #-----#\n",
 
456
  " result = sim_CB\n",
457
  " if(sim_BC > sim_CB):\n",
458
  " is_BC[index] = 1\n",
459
  " result = sim_BC\n",
460
+ " #-----#\n",
461
  " #result = absolute_value(result.item())\n",
462
  " result = result.item()\n",
463
  " dots[index] = result\n",
464
  "#----#\n",
 
465
  "sorted, indices = torch.sort(dots,dim=0 , descending=True)\n",
 
 
466
  "# @markdown ----------\n",
467
  "# @markdown # Print options\n",
468
  "list_size = 100 # @param {type:'number'}\n",
 
470
  "print_Similarity = True # @param {type:\"boolean\"}\n",
471
  "print_Name = True # @param {type:\"boolean\"}\n",
472
  "print_Divider = True # @param {type:\"boolean\"}\n",
473
+ "#----#\n",
 
474
  "if (print_Divider):\n",
475
  " print('//---//')\n",
476
+ "#----#\n",
477
  "print('')\n",
478
  "print(f'These token pairings within the range ID = {START} to ID = {START + RANGE} most closely match the text_encoding for {prompt_A} : ')\n",
479
  "print('')\n",
 
490
  "#----#\n",
491
  "for index in range(min(list_size,RANGE)):\n",
492
  " id = START + indices[index].item()\n",
493
+ " name = db_vocab[f'{id}']\n",
494
  " #-----#\n",
495
  " if (name.find('</w>')<=-1):\n",
496
  " name = name + '-'\n",
 
502
  " aheads = aheads + name + \"|\"\n",
503
  " #----#\n",
504
  " sim = sorted[index].item()\n",
505
+ " #----#\n",
506
  " if(is_BC[index]>0):\n",
507
  " if sim>max_sim_ahead:\n",
508
  " max_sim_ahead = sim\n",
 
511
  " if sim>max_sim_trail:\n",
512
  " max_sim_trail = sim\n",
513
  " max_name_trail = name\n",
 
514
  "#------#\n",
515
  "trails = (trails + \"&&&&\").replace(\"|&&&&\", \"}\").replace(\"</w>\", \" \").replace(\"{&&&&\", \"\")\n",
516
  "aheads = (aheads + \"&&&&\").replace(\"|&&&&\", \"}\").replace(\"</w>\", \" \").replace(\"{&&&&\", \"\")\n",
 
527
  "#-----#\n",
528
  "#STEP 2\n",
529
  "import random\n",
 
530
  "names = {}\n",
531
+ "NUM_PERMUTATIONS = 4\n",
532
+ "#-----#\n",
533
  "dots = torch.zeros(NUM_PERMUTATIONS)\n",
534
  "for index in range(NUM_PERMUTATIONS):\n",
535
  " name = must_start_with\n",
 
540
  " name = name + must_end_with\n",
541
  " #----#\n",
542
  " ids = processor.tokenizer(text=name, padding=use_token_padding, return_tensors=\"pt\")\n",
543
+ " #----#\n",
544
  " if(use == '🖼️image_encoding from image'):\n",
545
  " text_features = model.get_text_features(**ids)\n",
546
  " text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)\n",
 
548
  " torch.matmul(text_features, image_features.t()) * logit_scale\n",
549
  " sim = torch.nn.functional.cosine_similarity(text_features, image_features) * logit_scale\n",
550
  " #-----#\n",
 
551
  " if(use == '📝text_encoding from prompt'):\n",
552
  " text_features = model.get_text_features(**ids)\n",
553
  " text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)\n",
554
  " sim = torch.nn.functional.cosine_similarity(text_features, text_features_A)\n",
555
  " #-----#\n",
 
 
556
  " dots[index] = sim\n",
557
  " names[index] = name\n",
 
 
558
  "#------#\n",
 
559
  "sorted, indices = torch.sort(dots,dim=0 , descending=True)\n",
560
+ "#------#\n",
561
  "for index in range(NUM_PERMUTATIONS):\n",
562
  " print(names[indices[index].item()])\n",
563
  " print(f'similiarity = {round(sorted[index].item(),2)} %')\n",
564
+ " print('------')\n",
565
+ "#------#\n",
566
+ "db_vocab.close() #close the file"
567
  ],
568
  "metadata": {
569
  "collapsed": true,
 
605
  ],
606
  "metadata": {
607
  "id": "QQOjh5BvnG8M",
608
+ "collapsed": true,
609
+ "cellView": "form"
610
  },
611
  "execution_count": null,
612
  "outputs": []