FANG DAI
Upload 126 files
2ad255e verified
raw
history blame
5.68 kB
import math
import os
import random
import shutil
from pathlib import Path
from pynvml import *
import accelerate
import numpy as np
import torch
import torch.nn.functional as F
import torch.utils.checkpoint
from transformers import AutoTokenizer, PretrainedConfig
tensor1 = torch.tensor([[49406, 1884, 33667, 267, 21263, 268, 1126, 268, 7771, 267,
32955, 267, 38692, 267, 13989, 43204, 267, 1042, 13989, 49407,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0],
[49406, 1884, 33667, 267, 41122, 3633, 267, 21263, 268, 1126,
268, 7771, 267, 6148, 267, 32955, 267, 13989, 43204, 267,
1042, 13989, 267, 1579, 3396, 267, 2442, 1579, 3396, 49407,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0],
[49406, 1884, 33667, 267, 21263, 268, 1126, 268, 7771, 267,
3143, 267, 6307, 267, 1070, 1042, 13989, 49407, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0],
[49406, 1884, 33667, 267, 21263, 268, 1126, 268, 7771, 267,
46131, 267, 3143, 267, 6307, 49407, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0],
[49406, 1884, 33667, 267, 21263, 268, 1126, 268, 7771, 267,
6148, 267, 32955, 267, 38692, 267, 13989, 43204, 267, 1042,
13989, 267, 1579, 3396, 267, 5094, 268, 789, 1579, 3396,
49407, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0],
[49406, 1884, 33667, 267, 21263, 268, 1126, 268, 7771, 267,
32955, 267, 38692, 6448, 49407, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0]])
l = tensor1.tolist()
list_2 = sum(l, [])
def remove_item(n):
return n != 0 and n !=49407 and n!=49406 and n!=267
list_3 = list(filter(remove_item, list_2))
dict = {}
for key in list_3:
dict[key] = dict.get(key, 0) + 1
print(dict)
revision = None
tokenizer = AutoTokenizer.from_pretrained(
"/export/home/daifang/Diffusion/diffusers/model/sd-8_28",
subfolder="tokenizer",
revision=revision,
use_fast=False,
)
# captions = ['papillary blood flow', 'malignant follicular, solid, unclear, irregular, hales, circular, enormous, white point', 'papillary, wider-than-tall, solid, unclear, irregular, echo uneven, low echo, white points, sand-like white points', 'papillary, wider-than-tall, solid, unclear, irregular, echo uneven, extremely low echo, white points, sand-like white points', 'papillary, wider-than-tall, solid, unclear, irregular, echo uneven, low echo', 'papillary, wider-than-tall, solid, unclear, irregular, echo uneven, low echo, white points, sand-like white points']
['papillary, taller-than-wide, solid, unclear, irregular, echo uneven, low echo', 'papillary, wider-than-tall, solid, unclear, irregular, echo uneven, low echo, white points, sand-like white points', 'papillary, wider-than-tall, unclear, irregular, echo uneven, low echo, white points, sand-like white points', 'papillary, taller-than-wide, solid, unclear, irregular, echo uneven, low echo', 'papillary, taller-than-wide, solid, unclear, irregular, echo uneven, low echo, white points, large white points', 'No focus']
inputs = tokenizer(
captions, max_length=tokenizer.model_max_length, padding="max_length", truncation=True, return_tensors="pt")
print(inputs)