Upload sd_token_similarity_calculator.ipynb
Browse files- sd_token_similarity_calculator.ipynb +500 -28
sd_token_similarity_calculator.ipynb
CHANGED
|
@@ -28,7 +28,7 @@
|
|
| 28 |
{
|
| 29 |
"cell_type": "code",
|
| 30 |
"source": [
|
| 31 |
-
"# @title Load/initialize values\n",
|
| 32 |
"# Load the tokens into the colab\n",
|
| 33 |
"!git clone https://huggingface.co/datasets/codeShare/sd_tokens\n",
|
| 34 |
"import torch\n",
|
|
@@ -116,23 +116,10 @@
|
|
| 116 |
"metadata": {
|
| 117 |
"id": "Ch9puvwKH1s3",
|
| 118 |
"collapsed": true,
|
| 119 |
-
"cellView": "form"
|
| 120 |
-
"outputId": "9a9d4274-a633-464b-e1fb-06a33f3dd873",
|
| 121 |
-
"colab": {
|
| 122 |
-
"base_uri": "https://localhost:8080/"
|
| 123 |
-
}
|
| 124 |
},
|
| 125 |
-
"execution_count":
|
| 126 |
-
"outputs": [
|
| 127 |
-
{
|
| 128 |
-
"output_type": "stream",
|
| 129 |
-
"name": "stdout",
|
| 130 |
-
"text": [
|
| 131 |
-
"fatal: destination path 'sd_tokens' already exists and is not an empty directory.\n",
|
| 132 |
-
"/content/sd_tokens\n"
|
| 133 |
-
]
|
| 134 |
-
}
|
| 135 |
-
]
|
| 136 |
},
|
| 137 |
{
|
| 138 |
"cell_type": "code",
|
|
@@ -278,7 +265,8 @@
|
|
| 278 |
"#Print the sorted list from above result"
|
| 279 |
],
|
| 280 |
"metadata": {
|
| 281 |
-
"id": "iWeFnT1gAx6A"
|
|
|
|
| 282 |
},
|
| 283 |
"execution_count": null,
|
| 284 |
"outputs": []
|
|
@@ -315,7 +303,8 @@
|
|
| 315 |
],
|
| 316 |
"metadata": {
|
| 317 |
"id": "QQOjh5BvnG8M",
|
| 318 |
-
"collapsed": true
|
|
|
|
| 319 |
},
|
| 320 |
"execution_count": null,
|
| 321 |
"outputs": []
|
|
@@ -323,14 +312,497 @@
|
|
| 323 |
{
|
| 324 |
"cell_type": "code",
|
| 325 |
"source": [
|
| 326 |
-
"# @title
|
| 327 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 328 |
"prompt_A = \"photo of a banana\" # @param {\"type\":\"string\",\"placeholder\":\"Write a prompt\"}\n",
|
|
|
|
| 329 |
"# @markdown Set conditions for the output\n",
|
| 330 |
-
"must_start_with = \"
|
| 331 |
"must_contain = \"yellow\" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n",
|
| 332 |
-
"must_end_with = \"
|
| 333 |
-
"\n",
|
| 334 |
"token_B = must_contain\n",
|
| 335 |
"\n",
|
| 336 |
"# @markdown Limit the search\n",
|
|
@@ -343,7 +815,6 @@
|
|
| 343 |
"min_char_size = 3 # @param {type:\"slider\", min:0, max: 50, step:1}\n",
|
| 344 |
"char_range = 5 # @param {type:\"slider\", min:0, max: 50, step:1}\n",
|
| 345 |
"\n",
|
| 346 |
-
"\n",
|
| 347 |
"#Tokenize input B\n",
|
| 348 |
"from transformers import AutoTokenizer\n",
|
| 349 |
"tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
|
|
@@ -427,8 +898,6 @@
|
|
| 427 |
" dots[index] = result\n",
|
| 428 |
"#----#\n",
|
| 429 |
"\n",
|
| 430 |
-
"\n",
|
| 431 |
-
"\n",
|
| 432 |
"sorted, indices = torch.sort(dots,dim=0 , descending=True)\n",
|
| 433 |
"\n",
|
| 434 |
"# @markdown Print options\n",
|
|
@@ -464,6 +933,7 @@
|
|
| 464 |
" print('--------')"
|
| 465 |
],
|
| 466 |
"metadata": {
|
|
|
|
| 467 |
"id": "uDtcm-l8UCJk"
|
| 468 |
},
|
| 469 |
"execution_count": null,
|
|
@@ -901,7 +1371,9 @@
|
|
| 901 |
"\n",
|
| 902 |
"There might be some updates in the future with features not mentioned here.\n",
|
| 903 |
"\n",
|
| 904 |
-
"
|
|
|
|
|
|
|
| 905 |
],
|
| 906 |
"metadata": {
|
| 907 |
"id": "njeJx_nSSA8H"
|
|
|
|
| 28 |
{
|
| 29 |
"cell_type": "code",
|
| 30 |
"source": [
|
| 31 |
+
"# @title ✳️ Load/initialize values\n",
|
| 32 |
"# Load the tokens into the colab\n",
|
| 33 |
"!git clone https://huggingface.co/datasets/codeShare/sd_tokens\n",
|
| 34 |
"import torch\n",
|
|
|
|
| 116 |
"metadata": {
|
| 117 |
"id": "Ch9puvwKH1s3",
|
| 118 |
"collapsed": true,
|
| 119 |
+
"cellView": "form"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
},
|
| 121 |
+
"execution_count": null,
|
| 122 |
+
"outputs": []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
},
|
| 124 |
{
|
| 125 |
"cell_type": "code",
|
|
|
|
| 265 |
"#Print the sorted list from above result"
|
| 266 |
],
|
| 267 |
"metadata": {
|
| 268 |
+
"id": "iWeFnT1gAx6A",
|
| 269 |
+
"cellView": "form"
|
| 270 |
},
|
| 271 |
"execution_count": null,
|
| 272 |
"outputs": []
|
|
|
|
| 303 |
],
|
| 304 |
"metadata": {
|
| 305 |
"id": "QQOjh5BvnG8M",
|
| 306 |
+
"collapsed": true,
|
| 307 |
+
"cellView": "form"
|
| 308 |
},
|
| 309 |
"execution_count": null,
|
| 310 |
"outputs": []
|
|
|
|
| 312 |
{
|
| 313 |
"cell_type": "code",
|
| 314 |
"source": [
|
| 315 |
+
"# @title 🪐🖼️ -> 📝 Image to prompt : Add single token to existing prompt to match image\n",
|
| 316 |
+
"from google.colab import files\n",
|
| 317 |
+
"def getLocalFiles():\n",
|
| 318 |
+
" _files = files.upload()\n",
|
| 319 |
+
" if len(_files) >0:\n",
|
| 320 |
+
" for k,v in _files.items():\n",
|
| 321 |
+
" open(k,'wb').write(v)\n",
|
| 322 |
+
"\n",
|
| 323 |
+
"#Get image\n",
|
| 324 |
+
"# You can use \"http://images.cocodataset.org/val2017/000000039769.jpg\" for testing\n",
|
| 325 |
+
"url = \"http://images.cocodataset.org/val2017/000000039769.jpg\" # @param {\"type\":\"string\",\"placeholder\":\"leave empty for local upload\"}\n",
|
| 326 |
+
"from PIL import Image\n",
|
| 327 |
+
"import requests\n",
|
| 328 |
+
"if url == \"\":\n",
|
| 329 |
+
" image_A = getLocalFiles()\n",
|
| 330 |
+
"else:\n",
|
| 331 |
+
" image_A = Image.open(requests.get(url, stream=True).raw)\n",
|
| 332 |
+
"\n",
|
| 333 |
+
"\n",
|
| 334 |
+
"# Get image features\n",
|
| 335 |
+
"from transformers import CLIPProcessor, CLIPModel\n",
|
| 336 |
+
"processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-large-patch14\" , clean_up_tokenization_spaces = True)\n",
|
| 337 |
+
"model = CLIPModel.from_pretrained(\"openai/clip-vit-large-patch14\")\n",
|
| 338 |
+
"inputs = processor(images=image_A, return_tensors=\"pt\")\n",
|
| 339 |
+
"image_features = model.get_image_features(**inputs)\n",
|
| 340 |
+
"text_encoding_A = image_features\n",
|
| 341 |
+
"A = text_encoding_A[0]\n",
|
| 342 |
+
"_A = LA.vector_norm(A, ord=2)\n",
|
| 343 |
+
"prompt_A = \"the image\"\n",
|
| 344 |
+
"name_A = prompt_A\n",
|
| 345 |
+
"#-----#\n",
|
| 346 |
+
"\n",
|
| 347 |
+
"# @markdown Set conditions for the output\n",
|
| 348 |
+
"must_start_with = \"\" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n",
|
| 349 |
+
"must_contain = \"\" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n",
|
| 350 |
+
"must_end_with = \"\" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n",
|
| 351 |
+
"token_B = must_contain\n",
|
| 352 |
+
"\n",
|
| 353 |
+
"# @markdown Limit the search\n",
|
| 354 |
+
"use_token_padding = True # @param {type:\"boolean\"}\n",
|
| 355 |
+
"start_search_at_ID = 12500 # @param {type:\"slider\", min:0, max: 49407, step:100}\n",
|
| 356 |
+
"search_range = 500 # @param {type:\"slider\", min:0, max: 2000, step:100}\n",
|
| 357 |
+
"restrictions = 'Suffix only' # @param [\"None\", \"Suffix only\", \"Prefix only\"]\n",
|
| 358 |
+
"\n",
|
| 359 |
+
"# @markdown Limit char size of included token\n",
|
| 360 |
+
"min_char_size = 3 # @param {type:\"slider\", min:0, max: 50, step:1}\n",
|
| 361 |
+
"char_range = 5 # @param {type:\"slider\", min:0, max: 50, step:1}\n",
|
| 362 |
+
"\n",
|
| 363 |
+
"#Tokenize input B\n",
|
| 364 |
+
"from transformers import AutoTokenizer\n",
|
| 365 |
+
"tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
|
| 366 |
+
"tokenizer_output = tokenizer(text = token_B)\n",
|
| 367 |
+
"input_ids = tokenizer_output['input_ids']\n",
|
| 368 |
+
"#-----#\n",
|
| 369 |
+
"name_B = must_contain\n",
|
| 370 |
+
"#-----#\n",
|
| 371 |
+
"\n",
|
| 372 |
+
"START = start_search_at_ID\n",
|
| 373 |
+
"RANGE = min(search_range , 49407 - start_search_at_ID)\n",
|
| 374 |
+
"\n",
|
| 375 |
+
"dots = torch.zeros(RANGE)\n",
|
| 376 |
+
"is_BC = torch.zeros(RANGE)\n",
|
| 377 |
+
"for index in range(RANGE):\n",
|
| 378 |
+
" id_C = START + index\n",
|
| 379 |
+
" C = token[id_C]\n",
|
| 380 |
+
" _C = LA.vector_norm(C, ord=2)\n",
|
| 381 |
+
" name_C = vocab[id_C]\n",
|
| 382 |
+
"\n",
|
| 383 |
+
" # Decide if we should process prefix/suffix tokens\n",
|
| 384 |
+
" if name_C.find('</w>')<=-1:\n",
|
| 385 |
+
" if restrictions != \"Prefix only\":\n",
|
| 386 |
+
" continue\n",
|
| 387 |
+
" else:\n",
|
| 388 |
+
" if restrictions == \"Prefix only\":\n",
|
| 389 |
+
" continue\n",
|
| 390 |
+
" #-----#\n",
|
| 391 |
+
"\n",
|
| 392 |
+
" # Decide if char-size is within range\n",
|
| 393 |
+
" if len(name_C) < min_char_size:\n",
|
| 394 |
+
" continue\n",
|
| 395 |
+
" if len(name_C) > min_char_size + char_range:\n",
|
| 396 |
+
" continue\n",
|
| 397 |
+
" #-----#\n",
|
| 398 |
+
"\n",
|
| 399 |
+
" name_CB = must_start_with + name_C + name_B + must_end_with\n",
|
| 400 |
+
" if restrictions == \"Prefix only\":\n",
|
| 401 |
+
" name_CB = must_start_with + name_C + '-' + name_B + must_end_with\n",
|
| 402 |
+
" #-----#\n",
|
| 403 |
+
" ids_CB = processor.tokenizer(text=name_CB, padding=use_token_padding, return_tensors=\"pt\")\n",
|
| 404 |
+
" text_encoding_CB = model.get_text_features(**ids_CB)\n",
|
| 405 |
+
" CB = text_encoding_CB[0]\n",
|
| 406 |
+
" _CB = LA.vector_norm(CB, ord=2)\n",
|
| 407 |
+
" sim_CB = torch.dot(A,CB)/(_A*_CB)\n",
|
| 408 |
+
" #-----#\n",
|
| 409 |
+
" if restrictions == \"Prefix only\":\n",
|
| 410 |
+
" result = sim_CB\n",
|
| 411 |
+
" result = result.item()\n",
|
| 412 |
+
" dots[index] = result\n",
|
| 413 |
+
" continue\n",
|
| 414 |
+
" #-----#\n",
|
| 415 |
+
" name_BC = must_start_with + name_B + name_C + must_end_with\n",
|
| 416 |
+
" ids_BC = processor.tokenizer(text=name_BC, padding=use_token_padding, return_tensors=\"pt\")\n",
|
| 417 |
+
" text_encoding_BC = model.get_text_features(**ids_BC)\n",
|
| 418 |
+
" BC = text_encoding_BC[0]\n",
|
| 419 |
+
" _BC = LA.vector_norm(BC, ord=2)\n",
|
| 420 |
+
" sim_BC = torch.dot(A,BC)/(_A*_BC)\n",
|
| 421 |
+
" #-----#\n",
|
| 422 |
+
"\n",
|
| 423 |
+
" result = sim_CB\n",
|
| 424 |
+
" if(sim_BC > sim_CB):\n",
|
| 425 |
+
" is_BC[index] = 1\n",
|
| 426 |
+
" result = sim_BC\n",
|
| 427 |
+
"\n",
|
| 428 |
+
" #result = absolute_value(result.item())\n",
|
| 429 |
+
" result = result.item()\n",
|
| 430 |
+
" dots[index] = result\n",
|
| 431 |
+
"#----#\n",
|
| 432 |
+
"\n",
|
| 433 |
+
"sorted, indices = torch.sort(dots,dim=0 , descending=True)\n",
|
| 434 |
+
"\n",
|
| 435 |
+
"# @markdown Print options\n",
|
| 436 |
+
"list_size = 100 # @param {type:'number'}\n",
|
| 437 |
+
"print_ID = False # @param {type:\"boolean\"}\n",
|
| 438 |
+
"print_Similarity = True # @param {type:\"boolean\"}\n",
|
| 439 |
+
"print_Name = True # @param {type:\"boolean\"}\n",
|
| 440 |
+
"print_Divider = True # @param {type:\"boolean\"}\n",
|
| 441 |
+
"\n",
|
| 442 |
+
"\n",
|
| 443 |
+
"if (print_Divider):\n",
|
| 444 |
+
" print('//---//')\n",
|
| 445 |
+
"\n",
|
| 446 |
+
"print('')\n",
|
| 447 |
+
"print(f'These token pairings within the range ID = {START} to ID = {START + RANGE} most closely match the text_encoding for {prompt_A} : ')\n",
|
| 448 |
+
"print('')\n",
|
| 449 |
+
"\n",
|
| 450 |
+
"for index in range(min(list_size,RANGE)):\n",
|
| 451 |
+
" id = START + indices[index].item()\n",
|
| 452 |
+
" if (print_Name):\n",
|
| 453 |
+
" if(is_BC[index]>0):\n",
|
| 454 |
+
" print(must_start_with + name_B + vocab[id] + must_end_with)\n",
|
| 455 |
+
" else:\n",
|
| 456 |
+
" if restrictions == \"Prefix only\":\n",
|
| 457 |
+
" print(must_start_with + vocab[id] + '-' + name_B + must_end_with)\n",
|
| 458 |
+
" else:\n",
|
| 459 |
+
" print(must_start_with + vocab[id] + name_B + must_end_with)\n",
|
| 460 |
+
" if (print_ID):\n",
|
| 461 |
+
" print(f'ID = {id}') # IDs\n",
|
| 462 |
+
" if (print_Similarity):\n",
|
| 463 |
+
" print(f'similiarity = {round(sorted[index].item()*100,2)} %')\n",
|
| 464 |
+
" if (print_Divider):\n",
|
| 465 |
+
" print('--------')\n",
|
| 466 |
+
"\n",
|
| 467 |
+
"\n",
|
| 468 |
+
"\n",
|
| 469 |
+
"\n",
|
| 470 |
+
"\n"
|
| 471 |
+
],
|
| 472 |
+
"metadata": {
|
| 473 |
+
"collapsed": true,
|
| 474 |
+
"cellView": "form",
|
| 475 |
+
"id": "fi0jRruI0-tu",
|
| 476 |
+
"outputId": "6d7e8c39-a117-4b35-acfe-2a128c65aeb7",
|
| 477 |
+
"colab": {
|
| 478 |
+
"base_uri": "https://localhost:8080/"
|
| 479 |
+
}
|
| 480 |
+
},
|
| 481 |
+
"execution_count": 9,
|
| 482 |
+
"outputs": [
|
| 483 |
+
{
|
| 484 |
+
"output_type": "stream",
|
| 485 |
+
"name": "stdout",
|
| 486 |
+
"text": [
|
| 487 |
+
"//---//\n",
|
| 488 |
+
"\n",
|
| 489 |
+
"These token pairings within the range ID = 12500 to ID = 13000 most closely match the text_encoding for the prompt \"the image\" : \n",
|
| 490 |
+
"\n",
|
| 491 |
+
"sits</w>yellow\n",
|
| 492 |
+
"similiarity = 23.02 %\n",
|
| 493 |
+
"--------\n",
|
| 494 |
+
"neys</w>yellow\n",
|
| 495 |
+
"similiarity = 19.74 %\n",
|
| 496 |
+
"--------\n",
|
| 497 |
+
"cody</w>yellow\n",
|
| 498 |
+
"similiarity = 18.61 %\n",
|
| 499 |
+
"--------\n",
|
| 500 |
+
"wns</w>yellow\n",
|
| 501 |
+
"similiarity = 18.43 %\n",
|
| 502 |
+
"--------\n",
|
| 503 |
+
"java</w>yellow\n",
|
| 504 |
+
"similiarity = 18.15 %\n",
|
| 505 |
+
"--------\n",
|
| 506 |
+
"jj</w>yellow\n",
|
| 507 |
+
"similiarity = 18.03 %\n",
|
| 508 |
+
"--------\n",
|
| 509 |
+
"eno</w>yellow\n",
|
| 510 |
+
"similiarity = 17.87 %\n",
|
| 511 |
+
"--------\n",
|
| 512 |
+
"cled</w>yellow\n",
|
| 513 |
+
"similiarity = 17.85 %\n",
|
| 514 |
+
"--------\n",
|
| 515 |
+
"nom</w>yellow\n",
|
| 516 |
+
"similiarity = 17.75 %\n",
|
| 517 |
+
"--------\n",
|
| 518 |
+
"dads</w>yellow\n",
|
| 519 |
+
"similiarity = 17.5 %\n",
|
| 520 |
+
"--------\n",
|
| 521 |
+
"mil</w>yellow\n",
|
| 522 |
+
"similiarity = 17.47 %\n",
|
| 523 |
+
"--------\n",
|
| 524 |
+
"whom</w>yellow\n",
|
| 525 |
+
"similiarity = 17.37 %\n",
|
| 526 |
+
"--------\n",
|
| 527 |
+
"itv</w>yellow\n",
|
| 528 |
+
"similiarity = 17.34 %\n",
|
| 529 |
+
"--------\n",
|
| 530 |
+
"vibe</w>yellow\n",
|
| 531 |
+
"similiarity = 17.2 %\n",
|
| 532 |
+
"--------\n",
|
| 533 |
+
"noir</w>yellow\n",
|
| 534 |
+
"similiarity = 17.14 %\n",
|
| 535 |
+
"--------\n",
|
| 536 |
+
"yellowarel</w>\n",
|
| 537 |
+
"similiarity = 17.1 %\n",
|
| 538 |
+
"--------\n",
|
| 539 |
+
"#â̦</w>yellow\n",
|
| 540 |
+
"similiarity = 17.04 %\n",
|
| 541 |
+
"--------\n",
|
| 542 |
+
"maya</w>yellow\n",
|
| 543 |
+
"similiarity = 17.03 %\n",
|
| 544 |
+
"--------\n",
|
| 545 |
+
"yellowbam</w>\n",
|
| 546 |
+
"similiarity = 17.01 %\n",
|
| 547 |
+
"--------\n",
|
| 548 |
+
"erts</w>yellow\n",
|
| 549 |
+
"similiarity = 17.01 %\n",
|
| 550 |
+
"--------\n",
|
| 551 |
+
"xc</w>yellow\n",
|
| 552 |
+
"similiarity = 16.98 %\n",
|
| 553 |
+
"--------\n",
|
| 554 |
+
"mob</w>yellow\n",
|
| 555 |
+
"similiarity = 16.89 %\n",
|
| 556 |
+
"--------\n",
|
| 557 |
+
"dees</w>yellow\n",
|
| 558 |
+
"similiarity = 16.87 %\n",
|
| 559 |
+
"--------\n",
|
| 560 |
+
"icc</w>yellow\n",
|
| 561 |
+
"similiarity = 16.75 %\n",
|
| 562 |
+
"--------\n",
|
| 563 |
+
"aly</w>yellow\n",
|
| 564 |
+
"similiarity = 16.63 %\n",
|
| 565 |
+
"--------\n",
|
| 566 |
+
"lis</w>yellow\n",
|
| 567 |
+
"similiarity = 16.63 %\n",
|
| 568 |
+
"--------\n",
|
| 569 |
+
"yellowturf</w>\n",
|
| 570 |
+
"similiarity = 16.62 %\n",
|
| 571 |
+
"--------\n",
|
| 572 |
+
"yellowbaba</w>\n",
|
| 573 |
+
"similiarity = 16.58 %\n",
|
| 574 |
+
"--------\n",
|
| 575 |
+
":*</w>yellow\n",
|
| 576 |
+
"similiarity = 16.42 %\n",
|
| 577 |
+
"--------\n",
|
| 578 |
+
"inho</w>yellow\n",
|
| 579 |
+
"similiarity = 16.39 %\n",
|
| 580 |
+
"--------\n",
|
| 581 |
+
"yellowhes</w>\n",
|
| 582 |
+
"similiarity = 16.37 %\n",
|
| 583 |
+
"--------\n",
|
| 584 |
+
"nity</w>yellow\n",
|
| 585 |
+
"similiarity = 16.3 %\n",
|
| 586 |
+
"--------\n",
|
| 587 |
+
"lust</w>yellow\n",
|
| 588 |
+
"similiarity = 16.3 %\n",
|
| 589 |
+
"--------\n",
|
| 590 |
+
"ikh</w>yellow\n",
|
| 591 |
+
"similiarity = 16.26 %\n",
|
| 592 |
+
"--------\n",
|
| 593 |
+
"nyt</w>yellow\n",
|
| 594 |
+
"similiarity = 16.24 %\n",
|
| 595 |
+
"--------\n",
|
| 596 |
+
"(+</w>yellow\n",
|
| 597 |
+
"similiarity = 16.11 %\n",
|
| 598 |
+
"--------\n",
|
| 599 |
+
"foto</w>yellow\n",
|
| 600 |
+
"similiarity = 16.11 %\n",
|
| 601 |
+
"--------\n",
|
| 602 |
+
"stl</w>yellow\n",
|
| 603 |
+
"similiarity = 16.06 %\n",
|
| 604 |
+
"--------\n",
|
| 605 |
+
"mick</w>yellow\n",
|
| 606 |
+
"similiarity = 16.06 %\n",
|
| 607 |
+
"--------\n",
|
| 608 |
+
"...@</w>yellow\n",
|
| 609 |
+
"similiarity = 16.05 %\n",
|
| 610 |
+
"--------\n",
|
| 611 |
+
"ugh</w>yellow\n",
|
| 612 |
+
"similiarity = 16.05 %\n",
|
| 613 |
+
"--------\n",
|
| 614 |
+
"gro</w>yellow\n",
|
| 615 |
+
"similiarity = 16.01 %\n",
|
| 616 |
+
"--------\n",
|
| 617 |
+
"wski</w>yellow\n",
|
| 618 |
+
"similiarity = 16.01 %\n",
|
| 619 |
+
"--------\n",
|
| 620 |
+
"ðŁĴ«</w>yellow\n",
|
| 621 |
+
"similiarity = 15.74 %\n",
|
| 622 |
+
"--------\n",
|
| 623 |
+
"deen</w>yellow\n",
|
| 624 |
+
"similiarity = 15.73 %\n",
|
| 625 |
+
"--------\n",
|
| 626 |
+
"assy</w>yellow\n",
|
| 627 |
+
"similiarity = 15.72 %\n",
|
| 628 |
+
"--------\n",
|
| 629 |
+
"mtv</w>yellow\n",
|
| 630 |
+
"similiarity = 15.72 %\n",
|
| 631 |
+
"--------\n",
|
| 632 |
+
"yellowðŁĺ»</w>\n",
|
| 633 |
+
"similiarity = 15.72 %\n",
|
| 634 |
+
"--------\n",
|
| 635 |
+
"yellowfrm</w>\n",
|
| 636 |
+
"similiarity = 15.65 %\n",
|
| 637 |
+
"--------\n",
|
| 638 |
+
"moss</w>yellow\n",
|
| 639 |
+
"similiarity = 15.64 %\n",
|
| 640 |
+
"--------\n",
|
| 641 |
+
"bart</w>yellow\n",
|
| 642 |
+
"similiarity = 15.61 %\n",
|
| 643 |
+
"--------\n",
|
| 644 |
+
"tw</w>yellow\n",
|
| 645 |
+
"similiarity = 15.51 %\n",
|
| 646 |
+
"--------\n",
|
| 647 |
+
"yellowplug</w>\n",
|
| 648 |
+
"similiarity = 15.46 %\n",
|
| 649 |
+
"--------\n",
|
| 650 |
+
"jen</w>yellow\n",
|
| 651 |
+
"similiarity = 15.45 %\n",
|
| 652 |
+
"--------\n",
|
| 653 |
+
"pst</w>yellow\n",
|
| 654 |
+
"similiarity = 15.43 %\n",
|
| 655 |
+
"--------\n",
|
| 656 |
+
"omfg</w>yellow\n",
|
| 657 |
+
"similiarity = 15.43 %\n",
|
| 658 |
+
"--------\n",
|
| 659 |
+
"dine</w>yellow\n",
|
| 660 |
+
"similiarity = 15.38 %\n",
|
| 661 |
+
"--------\n",
|
| 662 |
+
"vern</w>yellow\n",
|
| 663 |
+
"similiarity = 15.33 %\n",
|
| 664 |
+
"--------\n",
|
| 665 |
+
"reno</w>yellow\n",
|
| 666 |
+
"similiarity = 15.25 %\n",
|
| 667 |
+
"--------\n",
|
| 668 |
+
"yellow´</w>\n",
|
| 669 |
+
"similiarity = 15.14 %\n",
|
| 670 |
+
"--------\n",
|
| 671 |
+
"omic</w>yellow\n",
|
| 672 |
+
"similiarity = 15.14 %\n",
|
| 673 |
+
"--------\n",
|
| 674 |
+
"łï¸ı</w>yellow\n",
|
| 675 |
+
"similiarity = 15.11 %\n",
|
| 676 |
+
"--------\n",
|
| 677 |
+
"yellowgis</w>\n",
|
| 678 |
+
"similiarity = 15.06 %\n",
|
| 679 |
+
"--------\n",
|
| 680 |
+
"aunt</w>yellow\n",
|
| 681 |
+
"similiarity = 15.0 %\n",
|
| 682 |
+
"--------\n",
|
| 683 |
+
"joan</w>yellow\n",
|
| 684 |
+
"similiarity = 14.96 %\n",
|
| 685 |
+
"--------\n",
|
| 686 |
+
"anas</w>yellow\n",
|
| 687 |
+
"similiarity = 14.92 %\n",
|
| 688 |
+
"--------\n",
|
| 689 |
+
"ðŁĴĵ</w>yellow\n",
|
| 690 |
+
"similiarity = 14.9 %\n",
|
| 691 |
+
"--------\n",
|
| 692 |
+
"chad</w>yellow\n",
|
| 693 |
+
"similiarity = 14.89 %\n",
|
| 694 |
+
"--------\n",
|
| 695 |
+
"yellowsake</w>\n",
|
| 696 |
+
"similiarity = 14.88 %\n",
|
| 697 |
+
"--------\n",
|
| 698 |
+
"gues</w>yellow\n",
|
| 699 |
+
"similiarity = 14.84 %\n",
|
| 700 |
+
"--------\n",
|
| 701 |
+
"gian</w>yellow\n",
|
| 702 |
+
"similiarity = 14.84 %\n",
|
| 703 |
+
"--------\n",
|
| 704 |
+
"asi</w>yellow\n",
|
| 705 |
+
"similiarity = 14.83 %\n",
|
| 706 |
+
"--------\n",
|
| 707 |
+
"yellowoven</w>\n",
|
| 708 |
+
"similiarity = 14.82 %\n",
|
| 709 |
+
"--------\n",
|
| 710 |
+
"jury</w>yellow\n",
|
| 711 |
+
"similiarity = 14.79 %\n",
|
| 712 |
+
"--------\n",
|
| 713 |
+
"blvd</w>yellow\n",
|
| 714 |
+
"similiarity = 14.75 %\n",
|
| 715 |
+
"--------\n",
|
| 716 |
+
"omez</w>yellow\n",
|
| 717 |
+
"similiarity = 14.72 %\n",
|
| 718 |
+
"--------\n",
|
| 719 |
+
"yellowyang</w>\n",
|
| 720 |
+
"similiarity = 14.7 %\n",
|
| 721 |
+
"--------\n",
|
| 722 |
+
"gu</w>yellow\n",
|
| 723 |
+
"similiarity = 14.48 %\n",
|
| 724 |
+
"--------\n",
|
| 725 |
+
"yellowova</w>\n",
|
| 726 |
+
"similiarity = 14.45 %\n",
|
| 727 |
+
"--------\n",
|
| 728 |
+
"yellowinez</w>\n",
|
| 729 |
+
"similiarity = 14.44 %\n",
|
| 730 |
+
"--------\n",
|
| 731 |
+
"pei</w>yellow\n",
|
| 732 |
+
"similiarity = 14.44 %\n",
|
| 733 |
+
"--------\n",
|
| 734 |
+
"ãĢIJ</w>yellow\n",
|
| 735 |
+
"similiarity = 14.43 %\n",
|
| 736 |
+
"--------\n",
|
| 737 |
+
"ãĢij</w>yellow\n",
|
| 738 |
+
"similiarity = 14.43 %\n",
|
| 739 |
+
"--------\n",
|
| 740 |
+
"ðŁĮŀ</w>yellow\n",
|
| 741 |
+
"similiarity = 14.36 %\n",
|
| 742 |
+
"--------\n",
|
| 743 |
+
"ðŁĺĿ</w>yellow\n",
|
| 744 |
+
"similiarity = 14.27 %\n",
|
| 745 |
+
"--------\n",
|
| 746 |
+
"troy</w>yellow\n",
|
| 747 |
+
"similiarity = 14.16 %\n",
|
| 748 |
+
"--------\n",
|
| 749 |
+
"pale</w>yellow\n",
|
| 750 |
+
"similiarity = 14.14 %\n",
|
| 751 |
+
"--------\n",
|
| 752 |
+
"boi</w>yellow\n",
|
| 753 |
+
"similiarity = 14.11 %\n",
|
| 754 |
+
"--------\n",
|
| 755 |
+
"nn</w>yellow\n",
|
| 756 |
+
"similiarity = 14.08 %\n",
|
| 757 |
+
"--------\n",
|
| 758 |
+
"âı°</w>yellow\n",
|
| 759 |
+
"similiarity = 14.01 %\n",
|
| 760 |
+
"--------\n",
|
| 761 |
+
"ooth</w>yellow\n",
|
| 762 |
+
"similiarity = 13.93 %\n",
|
| 763 |
+
"--------\n",
|
| 764 |
+
"pied</w>yellow\n",
|
| 765 |
+
"similiarity = 13.9 %\n",
|
| 766 |
+
"--------\n",
|
| 767 |
+
"bola</w>yellow\n",
|
| 768 |
+
"similiarity = 13.79 %\n",
|
| 769 |
+
"--------\n",
|
| 770 |
+
"âŀ¡</w>yellow\n",
|
| 771 |
+
"similiarity = 13.77 %\n",
|
| 772 |
+
"--------\n",
|
| 773 |
+
"rena</w>yellow\n",
|
| 774 |
+
"similiarity = 13.75 %\n",
|
| 775 |
+
"--------\n",
|
| 776 |
+
"dley</w>yellow\n",
|
| 777 |
+
"similiarity = 13.73 %\n",
|
| 778 |
+
"--------\n",
|
| 779 |
+
"evan</w>yellow\n",
|
| 780 |
+
"similiarity = 13.67 %\n",
|
| 781 |
+
"--------\n",
|
| 782 |
+
"pony</w>yellow\n",
|
| 783 |
+
"similiarity = 13.63 %\n",
|
| 784 |
+
"--------\n",
|
| 785 |
+
"rene</w>yellow\n",
|
| 786 |
+
"similiarity = 13.62 %\n",
|
| 787 |
+
"--------\n",
|
| 788 |
+
"mock</w>yellow\n",
|
| 789 |
+
"similiarity = 13.57 %\n",
|
| 790 |
+
"--------\n"
|
| 791 |
+
]
|
| 792 |
+
}
|
| 793 |
+
]
|
| 794 |
+
},
|
| 795 |
+
{
|
| 796 |
+
"cell_type": "code",
|
| 797 |
+
"source": [
|
| 798 |
+
"# @title 🪐📝 Prompt to prompt : Add single token to existing prompt to match another prompt\n",
|
| 799 |
+
"# @markdown Write a text to match against...\n",
|
| 800 |
"prompt_A = \"photo of a banana\" # @param {\"type\":\"string\",\"placeholder\":\"Write a prompt\"}\n",
|
| 801 |
+
"\n",
|
| 802 |
"# @markdown Set conditions for the output\n",
|
| 803 |
+
"must_start_with = \"\" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n",
|
| 804 |
"must_contain = \"yellow\" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n",
|
| 805 |
+
"must_end_with = \"\" # @param {\"type\":\"string\",\"placeholder\":\"write a text\"}\n",
|
|
|
|
| 806 |
"token_B = must_contain\n",
|
| 807 |
"\n",
|
| 808 |
"# @markdown Limit the search\n",
|
|
|
|
| 815 |
"min_char_size = 3 # @param {type:\"slider\", min:0, max: 50, step:1}\n",
|
| 816 |
"char_range = 5 # @param {type:\"slider\", min:0, max: 50, step:1}\n",
|
| 817 |
"\n",
|
|
|
|
| 818 |
"#Tokenize input B\n",
|
| 819 |
"from transformers import AutoTokenizer\n",
|
| 820 |
"tokenizer = AutoTokenizer.from_pretrained(\"openai/clip-vit-large-patch14\", clean_up_tokenization_spaces = False)\n",
|
|
|
|
| 898 |
" dots[index] = result\n",
|
| 899 |
"#----#\n",
|
| 900 |
"\n",
|
|
|
|
|
|
|
| 901 |
"sorted, indices = torch.sort(dots,dim=0 , descending=True)\n",
|
| 902 |
"\n",
|
| 903 |
"# @markdown Print options\n",
|
|
|
|
| 933 |
" print('--------')"
|
| 934 |
],
|
| 935 |
"metadata": {
|
| 936 |
+
"cellView": "form",
|
| 937 |
"id": "uDtcm-l8UCJk"
|
| 938 |
},
|
| 939 |
"execution_count": null,
|
|
|
|
| 1371 |
"\n",
|
| 1372 |
"There might be some updates in the future with features not mentioned here.\n",
|
| 1373 |
"\n",
|
| 1374 |
+
"//---//\n",
|
| 1375 |
+
"\n",
|
| 1376 |
+
"https://codeandlife.com/2023/01/26/mastering-the-huggingface-clip-model-how-to-extract-embeddings-and-calculate-similarity-for-text-and-images/"
|
| 1377 |
],
|
| 1378 |
"metadata": {
|
| 1379 |
"id": "njeJx_nSSA8H"
|