codeShare commited on
Commit
af5a290
·
verified ·
1 Parent(s): e6ba544

Upload sd_token_similarity_calculator.ipynb

Browse files
Files changed (1) hide show
  1. sd_token_similarity_calculator.ipynb +137 -4
sd_token_similarity_calculator.ipynb CHANGED
@@ -46,7 +46,8 @@
46
  "NUM_PREFIX = 13662\n",
47
  "NUM_SUFFIX = 32901\n",
48
  "\n",
49
- "loaded_Image_A = False\n",
 
50
  "\n",
51
  "#Import the vocab.json\n",
52
  "import json\n",
@@ -117,6 +118,22 @@
117
  " return ' ' #<---- return whitespace if other id like emojis etc.\n",
118
  "#--------#\n",
119
  "\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  "#print(get_token(35894))\n"
121
  ],
122
  "metadata": {
@@ -135,7 +152,7 @@
135
  "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
136
  "\n",
137
  "# @markdown Write name of token to match against\n",
138
- "token_name = \" banana\" # @param {type:'string',\"placeholder\":\"leave empty for random value token\"}\n",
139
  "\n",
140
  "prompt = token_name\n",
141
  "# @markdown (optional) Mix the token with something else\n",
@@ -308,7 +325,7 @@
308
  "#Get image\n",
309
  "# You can use \"http://images.cocodataset.org/val2017/000000039769.jpg\" for testing\n",
310
  "image_url = \"\" # @param {\"type\":\"string\",\"placeholder\":\"leave empty for local upload (scroll down to see it)\"}\n",
311
- "colab_image_path = \"\" # @param {\"type\":\"string\",\"placeholder\": \"eval. as '/content/sd_tokens/' + **your input**\"}\n",
312
  "# @markdown --------------------------\n",
313
  "\n",
314
  "image_path = \"\"\n",
@@ -332,6 +349,8 @@
332
  "else:\n",
333
  " image_A = Image.open(requests.get(image_url, stream=True).raw)\n",
334
  "#------#\n",
 
 
335
  "\n"
336
  ],
337
  "metadata": {
@@ -340,6 +359,89 @@
340
  "execution_count": null,
341
  "outputs": []
342
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
343
  {
344
  "cell_type": "code",
345
  "source": [
@@ -718,6 +820,35 @@
718
  "id": "hyK423TQCRup"
719
  }
720
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
721
  {
722
  "cell_type": "markdown",
723
  "source": [
@@ -929,7 +1060,9 @@
929
  "\n",
930
  "//---//\n",
931
  "\n",
932
- "https://codeandlife.com/2023/01/26/mastering-the-huggingface-clip-model-how-to-extract-embeddings-and-calculate-similarity-for-text-and-images/"
 
 
933
  ],
934
  "metadata": {
935
  "id": "njeJx_nSSA8H"
 
46
  "NUM_PREFIX = 13662\n",
47
  "NUM_SUFFIX = 32901\n",
48
  "\n",
49
+ "PREFIX_ENC_VOCAB = 'encoded_prefix_to_girl'\n",
50
+ "SUFFIX_ENC_VOCAB = 'encoded_suffix'\n",
51
  "\n",
52
  "#Import the vocab.json\n",
53
  "import json\n",
 
118
  " return ' ' #<---- return whitespace if other id like emojis etc.\n",
119
  "#--------#\n",
120
  "\n",
121
+ "#get token from id (excluding tokens with special symbols)\n",
122
+ "def get_suffix(id):\n",
123
+ " _id = f'{id}'\n",
124
+ " if int(id) <= NUM_SUFFIX:\n",
125
+ " return suffix[_id]\n",
126
+ " return ' ' #<---- return whitespace if out of bounds\n",
127
+ "#--------#\n",
128
+ "\n",
129
+ "#get token from id (excluding tokens with special symbols)\n",
130
+ "def get_prefix(id):\n",
131
+ " _id = f'{id}'\n",
132
+ " if int(id) <= NUM_PREFIX:\n",
133
+ " return prefix[_id]\n",
134
+ " return ' ' #<---- return whitespace if out of bounds\n",
135
+ "#--------#\n",
136
+ "\n",
137
  "#print(get_token(35894))\n"
138
  ],
139
  "metadata": {
 
152
  "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
153
  "\n",
154
  "# @markdown Write name of token to match against\n",
155
+ "token_name = \"prs \" # @param {type:'string',\"placeholder\":\"leave empty for random value token\"}\n",
156
  "\n",
157
  "prompt = token_name\n",
158
  "# @markdown (optional) Mix the token with something else\n",
 
325
  "#Get image\n",
326
  "# You can use \"http://images.cocodataset.org/val2017/000000039769.jpg\" for testing\n",
327
  "image_url = \"\" # @param {\"type\":\"string\",\"placeholder\":\"leave empty for local upload (scroll down to see it)\"}\n",
328
+ "colab_image_path = \"imperial.png\" # @param {\"type\":\"string\",\"placeholder\": \"eval. as '/content/sd_tokens/' + **your input**\"}\n",
329
  "# @markdown --------------------------\n",
330
  "\n",
331
  "image_path = \"\"\n",
 
349
  "else:\n",
350
  " image_A = Image.open(requests.get(image_url, stream=True).raw)\n",
351
  "#------#\n",
352
+ "from google.colab.patches import cv2_imshow\n",
353
+ "cv2_imshow(image_A)\n",
354
  "\n"
355
  ],
356
  "metadata": {
 
359
  "execution_count": null,
360
  "outputs": []
361
  },
362
+ {
363
+ "cell_type": "code",
364
+ "source": [
365
+ "# @title Order pre-made text_encodings to image similarity\n",
366
+ "from transformers import AutoTokenizer\n",
367
+ "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
368
+ "from transformers import CLIPProcessor, CLIPModel\n",
369
+ "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-large-patch14\" , clean_up_tokenization_spaces = True)\n",
370
+ "model = CLIPModel.from_pretrained(\"openai/clip-vit-large-patch14\")\n",
371
+ "\n",
372
+ "# Get image features\n",
373
+ "inputs = processor(images=image_A, return_tensors=\"pt\")\n",
374
+ "image_features = model.get_image_features(**inputs)\n",
375
+ "image_features = image_features / image_features.norm(p=2, dim=-1, keepdim=True)\n",
376
+ "name_A = \"the image\"\n",
377
+ "\n",
378
+ "# Load the .db file for prefix encodings\n",
379
+ "import shelve\n",
380
+ "d = shelve.open(PREFIX_ENC_VOCAB)\n",
381
+ "dots = results_sim = torch.zeros(NUM_PREFIX)\n",
382
+ "for index in range(NUM_PREFIX):\n",
383
+ " text_features = d[f'{index}']\n",
384
+ " logit_scale = model.logit_scale.exp()\n",
385
+ " torch.matmul(text_features, image_features.t()) * logit_scale\n",
386
+ " sim = torch.nn.functional.cosine_similarity(text_features, image_features) * logit_scale\n",
387
+ " dots[index] = sim\n",
388
+ "#----#\n",
389
+ "prefix_sorted, prefix_indices = torch.sort(dots,dim=0 , descending=True)\n",
390
+ "d.close() #close the file\n",
391
+ "\n",
392
+ "# Load the .db file for suffix encodings\n",
393
+ "import shelve\n",
394
+ "d = shelve.open(SUFFIX_ENC_VOCAB)\n",
395
+ "dots = results_sim = torch.zeros(NUM_SUFFIX)\n",
396
+ "for index in range(NUM_SUFFIX):\n",
397
+ " text_features = d[f'{index}']\n",
398
+ " logit_scale = model.logit_scale.exp()\n",
399
+ " torch.matmul(text_features, image_features.t()) * logit_scale\n",
400
+ " sim = torch.nn.functional.cosine_similarity(text_features, image_features) * logit_scale\n",
401
+ " dots[index] = sim\n",
402
+ "#----#\n",
403
+ "suffix_sorted, suffix_indices = torch.sort(dots,dim=0 , descending=True)\n",
404
+ "d.close() #close the file"
405
+ ],
406
+ "metadata": {
407
+ "id": "gaOB8rsOneIa"
408
+ },
409
+ "execution_count": null,
410
+ "outputs": []
411
+ },
412
+ {
413
+ "cell_type": "code",
414
+ "source": [
415
+ "# @title Show the 10 most similiar suffix and prefix text-encodings to the image encoding\n",
416
+ "\n",
417
+ "_suffixes = '{'\n",
418
+ "for index in range(20):\n",
419
+ " id = f'{suffix_indices[index]}'\n",
420
+ " sim = suffix_sorted[index]\n",
421
+ " name = get_suffix(id)\n",
422
+ " _suffixes = _suffixes + name + '|'\n",
423
+ "#------#\n",
424
+ "_suffixes = (_suffixes + '}').replace('|}', '}')\n",
425
+ "print('most similiar suffix tokens to image : ' + _suffixes)\n",
426
+ "\n",
427
+ "#-------#\n",
428
+ "\n",
429
+ "_prefixes = '{'\n",
430
+ "for index in range(20):\n",
431
+ " id = f'{prefix_indices[index]}'\n",
432
+ " sim = prefix_sorted[index]\n",
433
+ " name = get_prefix(id)\n",
434
+ " _prefixes = _prefixes + name + '|'\n",
435
+ "#------#\n",
436
+ "_prefixes = (_prefixes + '}').replace('|}', '}')\n",
437
+ "print('most similiar prefix tokens to image : ' + _prefixes)\n"
438
+ ],
439
+ "metadata": {
440
+ "id": "eZqMUhP0qYaK"
441
+ },
442
+ "execution_count": null,
443
+ "outputs": []
444
+ },
445
  {
446
  "cell_type": "code",
447
  "source": [
 
820
  "id": "hyK423TQCRup"
821
  }
822
  },
823
+ {
824
+ "cell_type": "code",
825
+ "source": [
826
+ "# @title Make your own text_encodings .db file for later use (rate is roughly 1K encodings per minute, so plan accordingly)\n",
827
+ "from transformers import AutoTokenizer\n",
828
+ "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
829
+ "from transformers import CLIPProcessor, CLIPModel\n",
830
+ "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-large-patch14\" , clean_up_tokenization_spaces = True)\n",
831
+ "model = CLIPModel.from_pretrained(\"openai/clip-vit-large-patch14\")\n",
832
+ "\n",
833
+ "# Save results as .db file\n",
834
+ "import shelve\n",
835
+ "d = shelve.open('my_text_encodings')\n",
836
+ "for index in range(NUM_PREFIX):\n",
837
+ " inputs = tokenizer(text = get_prefix(index)+'girl ', padding=True, return_tensors=\"pt\")\n",
838
+ " text_features = model.get_text_features(**inputs)\n",
839
+ " d[f'{index}'] = text_features\n",
840
+ "#----#\n",
841
+ "\n",
842
+ "d.close() #close the file\n",
843
+ "\n",
844
+ ""
845
+ ],
846
+ "metadata": {
847
+ "id": "9ZiTsF9jV0TV"
848
+ },
849
+ "execution_count": 10,
850
+ "outputs": []
851
+ },
852
  {
853
  "cell_type": "markdown",
854
  "source": [
 
1060
  "\n",
1061
  "//---//\n",
1062
  "\n",
1063
+ "https://codeandlife.com/2023/01/26/mastering-the-huggingface-clip-model-how-to-extract-embeddings-and-calculate-similarity-for-text-and-images/\n",
1064
+ "\n",
1065
+ "https://arxiv.org/pdf/2303.03032"
1066
  ],
1067
  "metadata": {
1068
  "id": "njeJx_nSSA8H"