File size: 5,679 Bytes

2ad255e

import math
import os
import random
import shutil
from pathlib import Path
from pynvml import *
import accelerate
import numpy as np
import torch
import torch.nn.functional as F
import torch.utils.checkpoint
from transformers import AutoTokenizer, PretrainedConfig

tensor1 = torch.tensor([[49406,  1884, 33667,   267, 21263,   268,  1126,   268,  7771,   267,
         32955,   267, 38692,   267, 13989, 43204,   267,  1042, 13989, 49407,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0],
        [49406,  1884, 33667,   267, 41122,  3633,   267, 21263,   268,  1126,
           268,  7771,   267,  6148,   267, 32955,   267, 13989, 43204,   267,
          1042, 13989,   267,  1579,  3396,   267,  2442,  1579,  3396, 49407,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0],
        [49406,  1884, 33667,   267, 21263,   268,  1126,   268,  7771,   267,
          3143,   267,  6307,   267,  1070,  1042, 13989, 49407,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0],
        [49406,  1884, 33667,   267, 21263,   268,  1126,   268,  7771,   267,
         46131,   267,  3143,   267,  6307, 49407,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0],
        [49406,  1884, 33667,   267, 21263,   268,  1126,   268,  7771,   267,
          6148,   267, 32955,   267, 38692,   267, 13989, 43204,   267,  1042,
         13989,   267,  1579,  3396,   267,  5094,   268,   789,  1579,  3396,
         49407,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0],
        [49406,  1884, 33667,   267, 21263,   268,  1126,   268,  7771,   267,
         32955,   267, 38692,  6448, 49407,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0]])


l = tensor1.tolist()
list_2 = sum(l, [])
def remove_item(n):
    return n != 0 and n !=49407 and n!=49406 and n!=267
list_3 = list(filter(remove_item, list_2))

dict = {}
for key in list_3:
    dict[key] = dict.get(key, 0) + 1
print(dict)

revision = None
tokenizer = AutoTokenizer.from_pretrained(
            "/export/home/daifang/Diffusion/diffusers/model/sd-8_28",
            subfolder="tokenizer",
            revision=revision,
            use_fast=False,
        )
# captions = ['papillary blood flow', 'malignant follicular, solid, unclear, irregular, hales, circular, enormous, white point', 'papillary, wider-than-tall, solid, unclear, irregular, echo uneven, low echo, white points, sand-like white points', 'papillary, wider-than-tall, solid, unclear, irregular, echo uneven, extremely low echo, white points, sand-like white points', 'papillary, wider-than-tall, solid, unclear, irregular, echo uneven, low echo', 'papillary, wider-than-tall, solid, unclear, irregular, echo uneven, low echo, white points, sand-like white points']


['papillary, taller-than-wide, solid, unclear, irregular, echo uneven, low echo', 'papillary, wider-than-tall, solid, unclear, irregular, echo uneven, low echo, white points, sand-like white points', 'papillary, wider-than-tall, unclear, irregular, echo uneven, low echo, white points, sand-like white points', 'papillary, taller-than-wide, solid, unclear, irregular, echo uneven, low echo', 'papillary, taller-than-wide, solid, unclear, irregular, echo uneven, low echo, white points, large white points', 'No focus']
inputs = tokenizer(
            captions, max_length=tokenizer.model_max_length, padding="max_length", truncation=True, return_tensors="pt")
print(inputs)