Upload sd_token_similarity_calculator.ipynb
Browse files
sd_token_similarity_calculator.ipynb
CHANGED
@@ -25,15 +25,6 @@
|
|
25 |
"id": "L7JTcbOdBPfh"
|
26 |
}
|
27 |
},
|
28 |
-
{
|
29 |
-
"cell_type": "code",
|
30 |
-
"source": [],
|
31 |
-
"metadata": {
|
32 |
-
"id": "PBwVIuAjEdHA"
|
33 |
-
},
|
34 |
-
"execution_count": null,
|
35 |
-
"outputs": []
|
36 |
-
},
|
37 |
{
|
38 |
"cell_type": "code",
|
39 |
"source": [
|
@@ -228,6 +219,42 @@
|
|
228 |
"execution_count": null,
|
229 |
"outputs": []
|
230 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
231 |
{
|
232 |
"cell_type": "markdown",
|
233 |
"source": [
|
@@ -256,7 +283,8 @@
|
|
256 |
"#You can leave the 'prompt' field empty to get a random value tensor. Since the tensor is random value, it will not correspond to any tensor in the vocab.json list , and this it will have no ID."
|
257 |
],
|
258 |
"metadata": {
|
259 |
-
"id": "RPdkYzT2_X85"
|
|
|
260 |
},
|
261 |
"execution_count": null,
|
262 |
"outputs": []
|
@@ -284,7 +312,8 @@
|
|
284 |
"_P = LA.vector_norm(A, ord=2)\n"
|
285 |
],
|
286 |
"metadata": {
|
287 |
-
"id": "YqdiF8DIz9Wu"
|
|
|
288 |
},
|
289 |
"execution_count": null,
|
290 |
"outputs": []
|
@@ -340,7 +369,8 @@
|
|
340 |
],
|
341 |
"metadata": {
|
342 |
"id": "oXbNSRSKPgRr",
|
343 |
-
"collapsed": true
|
|
|
344 |
},
|
345 |
"execution_count": null,
|
346 |
"outputs": []
|
@@ -382,7 +412,8 @@
|
|
382 |
],
|
383 |
"metadata": {
|
384 |
"id": "juxsvco9B0iV",
|
385 |
-
"collapsed": true
|
|
|
386 |
},
|
387 |
"execution_count": null,
|
388 |
"outputs": []
|
@@ -419,7 +450,8 @@
|
|
419 |
],
|
420 |
"metadata": {
|
421 |
"id": "YIEmLAzbHeuo",
|
422 |
-
"collapsed": true
|
|
|
423 |
},
|
424 |
"execution_count": null,
|
425 |
"outputs": []
|
@@ -440,47 +472,8 @@
|
|
440 |
],
|
441 |
"metadata": {
|
442 |
"id": "MwmOdC9cNZty",
|
443 |
-
"collapsed": true
|
444 |
-
|
445 |
-
"execution_count": null,
|
446 |
-
"outputs": []
|
447 |
-
},
|
448 |
-
{
|
449 |
-
"cell_type": "code",
|
450 |
-
"source": [
|
451 |
-
"# @title 💫 Compare Text encodings\n",
|
452 |
-
"\n",
|
453 |
-
"prompt_A = \"\" # @param {\"type\":\"string\",\"placeholder\":\"Write a prompt\"}\n",
|
454 |
-
"prompt_B = \"\" # @param {\"type\":\"string\",\"placeholder\":\"Write a prompt\"}\n",
|
455 |
-
"use_token_padding = True # @param {type:\"boolean\"}\n",
|
456 |
-
"\n",
|
457 |
-
"from transformers import CLIPProcessor, CLIPModel\n",
|
458 |
-
"\n",
|
459 |
-
"\n",
|
460 |
-
"processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-large-patch14\" , clean_up_tokenization_spaces = True)\n",
|
461 |
-
"\n",
|
462 |
-
"model = CLIPModel.from_pretrained(\"openai/clip-vit-large-patch14\")\n",
|
463 |
-
"\n",
|
464 |
-
"ids_A = processor.tokenizer(text=prompt_A, padding=use_token_padding, return_tensors=\"pt\")\n",
|
465 |
-
"text_encoding_A = model.get_text_features(**ids_A)\n",
|
466 |
-
"\n",
|
467 |
-
"ids_B = processor.tokenizer(text=prompt_B, padding=use_token_padding, return_tensors=\"pt\")\n",
|
468 |
-
"text_encoding_B = model.get_text_features(**ids_B)\n",
|
469 |
-
"\n",
|
470 |
-
"similarity_str = 'The similarity between the text_encoding for A:\"' + prompt_A + '\" and B: \"' + prompt_B +'\" is ' + token_similarity(text_encoding_A[0] , text_encoding_B[0])\n",
|
471 |
-
"\n",
|
472 |
-
"\n",
|
473 |
-
"print(similarity_str)\n",
|
474 |
-
"#outputs = model(**inputs)\n",
|
475 |
-
"#logits_per_image = outputs.logits_per_image # this is the image-text similarity score\n",
|
476 |
-
"#probs = logits_per_image.softmax(dim=1) # we can take the softmax to get the label probabilities\n",
|
477 |
-
"\n",
|
478 |
-
"\n",
|
479 |
-
"\n"
|
480 |
-
],
|
481 |
-
"metadata": {
|
482 |
-
"id": "QQOjh5BvnG8M",
|
483 |
-
"collapsed": true
|
484 |
},
|
485 |
"execution_count": null,
|
486 |
"outputs": []
|
|
|
25 |
"id": "L7JTcbOdBPfh"
|
26 |
}
|
27 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
{
|
29 |
"cell_type": "code",
|
30 |
"source": [
|
|
|
219 |
"execution_count": null,
|
220 |
"outputs": []
|
221 |
},
|
222 |
+
{
|
223 |
+
"cell_type": "code",
|
224 |
+
"source": [
|
225 |
+
"# @title 💫 Compare Text encodings\n",
|
226 |
+
"\n",
|
227 |
+
"prompt_A = \"\" # @param {\"type\":\"string\",\"placeholder\":\"Write a prompt\"}\n",
|
228 |
+
"prompt_B = \"\" # @param {\"type\":\"string\",\"placeholder\":\"Write a prompt\"}\n",
|
229 |
+
"use_token_padding = True # @param {type:\"boolean\"}\n",
|
230 |
+
"\n",
|
231 |
+
"from transformers import CLIPProcessor, CLIPModel\n",
|
232 |
+
"\n",
|
233 |
+
"processor = CLIPProcessor.from_pretrained(\"openai/clip-vit-large-patch14\" , clean_up_tokenization_spaces = True)\n",
|
234 |
+
"\n",
|
235 |
+
"model = CLIPModel.from_pretrained(\"openai/clip-vit-large-patch14\")\n",
|
236 |
+
"\n",
|
237 |
+
"ids_A = processor.tokenizer(text=prompt_A, padding=use_token_padding, return_tensors=\"pt\")\n",
|
238 |
+
"text_encoding_A = model.get_text_features(**ids_A)\n",
|
239 |
+
"\n",
|
240 |
+
"ids_B = processor.tokenizer(text=prompt_B, padding=use_token_padding, return_tensors=\"pt\")\n",
|
241 |
+
"text_encoding_B = model.get_text_features(**ids_B)\n",
|
242 |
+
"\n",
|
243 |
+
"similarity_str = 'The similarity between the text_encoding for A:\"' + prompt_A + '\" and B: \"' + prompt_B +'\" is ' + token_similarity(text_encoding_A[0] , text_encoding_B[0])\n",
|
244 |
+
"\n",
|
245 |
+
"\n",
|
246 |
+
"print(similarity_str)\n",
|
247 |
+
"#outputs = model(**inputs)\n",
|
248 |
+
"#logits_per_image = outputs.logits_per_image # this is the image-text similarity score\n",
|
249 |
+
"#probs = logits_per_image.softmax(dim=1) # we can take the softmax to get the label probabilities"
|
250 |
+
],
|
251 |
+
"metadata": {
|
252 |
+
"id": "QQOjh5BvnG8M",
|
253 |
+
"collapsed": true
|
254 |
+
},
|
255 |
+
"execution_count": null,
|
256 |
+
"outputs": []
|
257 |
+
},
|
258 |
{
|
259 |
"cell_type": "markdown",
|
260 |
"source": [
|
|
|
283 |
"#You can leave the 'prompt' field empty to get a random value tensor. Since the tensor is random value, it will not correspond to any tensor in the vocab.json list , and this it will have no ID."
|
284 |
],
|
285 |
"metadata": {
|
286 |
+
"id": "RPdkYzT2_X85",
|
287 |
+
"cellView": "form"
|
288 |
},
|
289 |
"execution_count": null,
|
290 |
"outputs": []
|
|
|
312 |
"_P = LA.vector_norm(A, ord=2)\n"
|
313 |
],
|
314 |
"metadata": {
|
315 |
+
"id": "YqdiF8DIz9Wu",
|
316 |
+
"cellView": "form"
|
317 |
},
|
318 |
"execution_count": null,
|
319 |
"outputs": []
|
|
|
369 |
],
|
370 |
"metadata": {
|
371 |
"id": "oXbNSRSKPgRr",
|
372 |
+
"collapsed": true,
|
373 |
+
"cellView": "form"
|
374 |
},
|
375 |
"execution_count": null,
|
376 |
"outputs": []
|
|
|
412 |
],
|
413 |
"metadata": {
|
414 |
"id": "juxsvco9B0iV",
|
415 |
+
"collapsed": true,
|
416 |
+
"cellView": "form"
|
417 |
},
|
418 |
"execution_count": null,
|
419 |
"outputs": []
|
|
|
450 |
],
|
451 |
"metadata": {
|
452 |
"id": "YIEmLAzbHeuo",
|
453 |
+
"collapsed": true,
|
454 |
+
"cellView": "form"
|
455 |
},
|
456 |
"execution_count": null,
|
457 |
"outputs": []
|
|
|
472 |
],
|
473 |
"metadata": {
|
474 |
"id": "MwmOdC9cNZty",
|
475 |
+
"collapsed": true,
|
476 |
+
"cellView": "form"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
477 |
},
|
478 |
"execution_count": null,
|
479 |
"outputs": []
|