codeShare commited on
Commit
213eb4a
·
verified ·
1 Parent(s): 4e7197b

Upload sd_token_similarity_calculator.ipynb

Browse files
Files changed (1) hide show
  1. sd_token_similarity_calculator.ipynb +138 -33
sd_token_similarity_calculator.ipynb CHANGED
@@ -46,8 +46,18 @@
46
  "NUM_PREFIX = 13662\n",
47
  "NUM_SUFFIX = 32901\n",
48
  "\n",
49
- "PREFIX_ENC_VOCAB = 'encoded_prefix_to_girl'\n",
50
- "SUFFIX_ENC_VOCAB = 'encoded_suffix'\n",
 
 
 
 
 
 
 
 
 
 
51
  "\n",
52
  "#Import the vocab.json\n",
53
  "import json\n",
@@ -134,33 +144,21 @@
134
  " return ' ' #<---- return whitespace if out of bounds\n",
135
  "#--------#\n",
136
  "\n",
 
 
 
 
 
 
 
137
  "#print(get_token(35894))\n"
138
  ],
139
  "metadata": {
140
  "id": "Ch9puvwKH1s3",
141
- "collapsed": true,
142
- "colab": {
143
- "base_uri": "https://localhost:8080/"
144
- },
145
- "outputId": "2333a33b-1344-4a14-bee6-060d98167715"
146
  },
147
- "execution_count": 1,
148
- "outputs": [
149
- {
150
- "output_type": "stream",
151
- "name": "stdout",
152
- "text": [
153
- "Cloning into 'sd_tokens'...\n",
154
- "remote: Enumerating objects: 72, done.\u001b[K\n",
155
- "remote: Counting objects: 100% (69/69), done.\u001b[K\n",
156
- "remote: Compressing objects: 100% (69/69), done.\u001b[K\n",
157
- "remote: Total 72 (delta 24), reused 0 (delta 0), pack-reused 3 (from 1)\u001b[K\n",
158
- "Unpacking objects: 100% (72/72), 1.34 MiB | 1.65 MiB/s, done.\n",
159
- "Filtering content: 100% (10/10), 899.76 MiB | 50.12 MiB/s, done.\n",
160
- "/content/sd_tokens\n"
161
- ]
162
- }
163
- ]
164
  },
165
  {
166
  "cell_type": "code",
@@ -316,18 +314,125 @@
316
  "outputs": []
317
  },
318
  {
319
- "cell_type": "markdown",
320
  "source": [
321
- "Below image interrogator appends CLIP tokens to either end of the 'must_contain' text , and seeks to maximize similarity with the image encoding.\n",
 
 
 
 
 
 
322
  "\n",
323
- "It takes a long while to check all the tokens (too long!) so this cell only samples a range of the 49K available tokens.\n",
 
 
 
 
 
324
  "\n",
325
- "You can run this cell, then paste the result into the 'must_contain' box , and then run the cell again.\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
326
  "\n",
327
- "Check the sd_tokens folder for stored .db files from running the '⚡ Get similiar tokens' cell. These can be used in the ⚡+🖼️ -> 📝 Token-Sampling Image interrogator cell\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
328
  ],
329
  "metadata": {
330
- "id": "IUCuV9RtQpBn"
331
  }
332
  },
333
  {
@@ -397,7 +502,7 @@
397
  {
398
  "cell_type": "code",
399
  "source": [
400
- "# @title Order pre-made text_encodings to image similarity\n",
401
  "from transformers import AutoTokenizer\n",
402
  "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
403
  "from transformers import CLIPProcessor, CLIPModel\n",
@@ -447,7 +552,7 @@
447
  {
448
  "cell_type": "code",
449
  "source": [
450
- "# @title Show the 10 most similiar suffix and prefix text-encodings to the image encoding\n",
451
  "\n",
452
  "_suffixes = '{'\n",
453
  "for index in range(20):\n",
@@ -895,7 +1000,7 @@
895
  "metadata": {
896
  "id": "9ZiTsF9jV0TV"
897
  },
898
- "execution_count": 6,
899
  "outputs": []
900
  },
901
  {
 
46
  "NUM_PREFIX = 13662\n",
47
  "NUM_SUFFIX = 32901\n",
48
  "\n",
49
+ "PREFIX_ENC_VOCAB = ['encoded_prefix_to_girl',]\n",
50
+ "SUFFIX_ENC_VOCAB = [\n",
51
+ " 'from_-encoded_suffix',\n",
52
+ " 'a_-_encoded_suffix' ,\n",
53
+ " 'by_-encoded_suffix' ,\n",
54
+ " 'encoded_suffix-_like']\n",
55
+ "\n",
56
+ "# Make sure these match above results\n",
57
+ "NUM_PREFIX_LISTS = 1\n",
58
+ "NUM_SUFFIX_LISTS = 4\n",
59
+ "#-----#\n",
60
+ "\n",
61
  "\n",
62
  "#Import the vocab.json\n",
63
  "import json\n",
 
144
  " return ' ' #<---- return whitespace if out of bounds\n",
145
  "#--------#\n",
146
  "\n",
147
+ "\n",
148
+ "def _modulus(_id,id_max):\n",
149
+ " id = _id\n",
150
+ " while(id>id_max):\n",
151
+ " id = id-id_max\n",
152
+ " return id\n",
153
+ "\n",
154
  "#print(get_token(35894))\n"
155
  ],
156
  "metadata": {
157
  "id": "Ch9puvwKH1s3",
158
+ "collapsed": true
 
 
 
 
159
  },
160
+ "execution_count": null,
161
+ "outputs": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
  },
163
  {
164
  "cell_type": "code",
 
314
  "outputs": []
315
  },
316
  {
317
+ "cell_type": "code",
318
  "source": [
319
+ "# @title 📝 Prompt similarity: Order pre-made text_encodings\n",
320
+ "prompt = \"photo of a banana\" # @param {\"type\":\"string\",\"placeholder\":\"Write a prompt\"}\n",
321
+ "from transformers import AutoTokenizer\n",
322
+ "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
323
+ "from transformers import CLIPProcessor, CLIPModel\n",
324
+ "processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-large-patch14\" , clean_up_tokenization_spaces = True)\n",
325
+ "model = CLIPModel.from_pretrained(\"openai/clip-vit-large-patch14\")\n",
326
  "\n",
327
+ "# Get text features for user input\n",
328
+ "inputs = tokenizer(text = prompt, padding=True, return_tensors=\"pt\")\n",
329
+ "text_features_A = model.get_text_features(**inputs)\n",
330
+ "text_features_A = text_features_A/text_features_A.norm(p=2, dim=-1, keepdim=True)\n",
331
+ "name_A = prompt\n",
332
+ "#------#\n",
333
  "\n",
334
+ "# Load the .db file for prefix encodings\n",
335
+ "import shelve\n",
336
+ "_iters = -1\n",
337
+ "RANGE = NUM_PREFIX\n",
338
+ "NUM_PREFIX_LISTS = 1\n",
339
+ "dots = results_sim = torch.zeros(RANGE*NUM_PREFIX_LISTS)\n",
340
+ "for _PREFIX_ENC_VOCAB in PREFIX_ENC_VOCAB:\n",
341
+ " _iters = _iters + 1\n",
342
+ " d = shelve.open(_PREFIX_ENC_VOCAB)\n",
343
+ " for _index in range(RANGE):\n",
344
+ " index = _iters*RANGE + _index\n",
345
+ " text_features = d[f'{_index}']\n",
346
+ " text_features = text_features/text_features.norm(p=2, dim=-1, keepdim=True)\n",
347
+ " sim = torch.nn.functional.cosine_similarity(text_features, text_features_A)\n",
348
+ " dots[index] = sim\n",
349
+ " #----#\n",
350
+ " d.close() #close the file\n",
351
+ "#------#\n",
352
+ "prefix_sorted, prefix_indices = torch.sort(dots,dim=0 , descending=True)\n",
353
+ "#------#\n",
354
  "\n",
355
+ "# Load the .db file for prefix encodings\n",
356
+ "import shelve\n",
357
+ "_iters = -1\n",
358
+ "RANGE = NUM_SUFFIX\n",
359
+ "dots = results_sim = torch.zeros(RANGE*NUM_SUFFIX_LISTS)\n",
360
+ "for _SUFFIX_ENC_VOCAB in SUFFIX_ENC_VOCAB:\n",
361
+ " _iters = _iters + 1\n",
362
+ " d = shelve.open(_SUFFIX_ENC_VOCAB)\n",
363
+ " for _index in range(RANGE):\n",
364
+ " index = _iters*RANGE + _index\n",
365
+ " text_features = d[f'{_index}']\n",
366
+ " text_features = text_features/text_features.norm(p=2, dim=-1, keepdim=True)\n",
367
+ " sim = torch.nn.functional.cosine_similarity(text_features, text_features_A)\n",
368
+ " dots[index] = sim\n",
369
+ " #----#\n",
370
+ " d.close() #close the file\n",
371
+ "#------#\n",
372
+ "suffix_sorted, suffix_indices = torch.sort(dots,dim=0 , descending=True)\n",
373
+ "#------#\n",
374
+ "\n",
375
+ "#Print the results\n",
376
+ "#'from_-encoded_suffix',\n",
377
+ "#'a_-_encoded_suffix' ,\n",
378
+ "#'by_-encoded_suffix' ,\n",
379
+ "#'encoded_suffix-_like'\n",
380
+ "\n",
381
+ "# title Show the 100 most similiar suffix and prefix text-encodings to the text encoding\n",
382
+ "RANGE = 100\n",
383
+ "_suffixes = '{'\n",
384
+ "_sims = '{'\n",
385
+ "for index in range(RANGE):\n",
386
+ " id = int(suffix_indices[index])\n",
387
+ " ahead = \"from \"\n",
388
+ " behind = \"\"\n",
389
+ " if(id>NUM_SUFFIX*1):\n",
390
+ " ahead = \"a \"\n",
391
+ " if(id>NUM_SUFFIX*2):\n",
392
+ " ahead = \"by \"\n",
393
+ " if(id>NUM_SUFFIX*3):\n",
394
+ " ahead = \"\"\n",
395
+ " behind = \"like\"\n",
396
+ " id = _modulus(id,NUM_SUFFIX)\n",
397
+ " #------#\n",
398
+ " sim = suffix_sorted[index].item()\n",
399
+ " name = ahead + get_suffix(id) + behind\n",
400
+ " if(get_suffix(id) == ' '): name = ahead + f'{id}' + behind\n",
401
+ " _suffixes = _suffixes + name + '|'\n",
402
+ " _sims = _sims + f'{round(sim*100,2)} %' + '|'\n",
403
+ "#------#\n",
404
+ "_suffixes = (_suffixes + '}').replace('|}', '}')\n",
405
+ "_sims = (_sims + '}').replace('|}', '}')\n",
406
+ "\n",
407
+ "print('most similiar suffix items to prompt : ' + _suffixes)\n",
408
+ "print('similarity % for suffix items : ' + _sims)\n",
409
+ "print('')\n",
410
+ "\n",
411
+ "#-------#\n",
412
+ "\n",
413
+ "_prefixes = '{'\n",
414
+ "for index in range(RANGE):\n",
415
+ " id = f'{prefix_indices[index]}'\n",
416
+ " #sim = prefix_sorted[index]\n",
417
+ " name = get_prefix(id)\n",
418
+ " _prefixes = _prefixes + name + '|'\n",
419
+ "#------#\n",
420
+ "_prefixes = (_prefixes + '}').replace('|}', '}')\n",
421
+ "print('most similiar prefix suffix to image : ' + _prefixes)\n"
422
+ ],
423
+ "metadata": {
424
+ "id": "xc-PbIYF428y"
425
+ },
426
+ "execution_count": null,
427
+ "outputs": []
428
+ },
429
+ {
430
+ "cell_type": "markdown",
431
+ "source": [
432
+ "# Below are the Image interrogators"
433
  ],
434
  "metadata": {
435
+ "id": "qZvLkJCtGC89"
436
  }
437
  },
438
  {
 
502
  {
503
  "cell_type": "code",
504
  "source": [
505
+ "# @title 🖼️ Image similarity : Order pre-made text_encodings\n",
506
  "from transformers import AutoTokenizer\n",
507
  "tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
508
  "from transformers import CLIPProcessor, CLIPModel\n",
 
552
  {
553
  "cell_type": "code",
554
  "source": [
555
+ "# @title 🖼️ Show the 10 most similiar suffix and prefix text-encodings to the image encoding\n",
556
  "\n",
557
  "_suffixes = '{'\n",
558
  "for index in range(20):\n",
 
1000
  "metadata": {
1001
  "id": "9ZiTsF9jV0TV"
1002
  },
1003
+ "execution_count": null,
1004
  "outputs": []
1005
  },
1006
  {