|
import math |
|
import os |
|
import random |
|
import shutil |
|
from pathlib import Path |
|
from pynvml import * |
|
import accelerate |
|
import numpy as np |
|
import torch |
|
import torch.nn.functional as F |
|
import torch.utils.checkpoint |
|
from transformers import AutoTokenizer, PretrainedConfig |
|
|
|
tensor1 = torch.tensor([[49406, 1884, 33667, 267, 21263, 268, 1126, 268, 7771, 267, |
|
32955, 267, 38692, 267, 13989, 43204, 267, 1042, 13989, 49407, |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
0, 0, 0, 0, 0, 0, 0], |
|
[49406, 1884, 33667, 267, 41122, 3633, 267, 21263, 268, 1126, |
|
268, 7771, 267, 6148, 267, 32955, 267, 13989, 43204, 267, |
|
1042, 13989, 267, 1579, 3396, 267, 2442, 1579, 3396, 49407, |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
0, 0, 0, 0, 0, 0, 0], |
|
[49406, 1884, 33667, 267, 21263, 268, 1126, 268, 7771, 267, |
|
3143, 267, 6307, 267, 1070, 1042, 13989, 49407, 0, 0, |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
0, 0, 0, 0, 0, 0, 0], |
|
[49406, 1884, 33667, 267, 21263, 268, 1126, 268, 7771, 267, |
|
46131, 267, 3143, 267, 6307, 49407, 0, 0, 0, 0, |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
0, 0, 0, 0, 0, 0, 0], |
|
[49406, 1884, 33667, 267, 21263, 268, 1126, 268, 7771, 267, |
|
6148, 267, 32955, 267, 38692, 267, 13989, 43204, 267, 1042, |
|
13989, 267, 1579, 3396, 267, 5094, 268, 789, 1579, 3396, |
|
49407, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
0, 0, 0, 0, 0, 0, 0], |
|
[49406, 1884, 33667, 267, 21263, 268, 1126, 268, 7771, 267, |
|
32955, 267, 38692, 6448, 49407, 0, 0, 0, 0, 0, |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|
0, 0, 0, 0, 0, 0, 0]]) |
|
|
|
|
|
l = tensor1.tolist() |
|
list_2 = sum(l, []) |
|
def remove_item(n): |
|
return n != 0 and n !=49407 and n!=49406 and n!=267 |
|
list_3 = list(filter(remove_item, list_2)) |
|
|
|
dict = {} |
|
for key in list_3: |
|
dict[key] = dict.get(key, 0) + 1 |
|
print(dict) |
|
|
|
revision = None |
|
tokenizer = AutoTokenizer.from_pretrained( |
|
"/export/home/daifang/Diffusion/diffusers/model/sd-8_28", |
|
subfolder="tokenizer", |
|
revision=revision, |
|
use_fast=False, |
|
) |
|
|
|
|
|
|
|
['papillary, taller-than-wide, solid, unclear, irregular, echo uneven, low echo', 'papillary, wider-than-tall, solid, unclear, irregular, echo uneven, low echo, white points, sand-like white points', 'papillary, wider-than-tall, unclear, irregular, echo uneven, low echo, white points, sand-like white points', 'papillary, taller-than-wide, solid, unclear, irregular, echo uneven, low echo', 'papillary, taller-than-wide, solid, unclear, irregular, echo uneven, low echo, white points, large white points', 'No focus'] |
|
inputs = tokenizer( |
|
captions, max_length=tokenizer.model_max_length, padding="max_length", truncation=True, return_tensors="pt") |
|
print(inputs) |
|
|