the output dimention
#1
by
hellopbc
- opened
from transformers import AutoTokenizer, AutoModelForMaskedLM
model_name = 'pre-model/' + 'electra/' + 'humandna_ELECTRA_1epoch'
tokenizer = AutoTokenizer.from_pretrained("simecek/humandna_ELECTRA_1epoch")
model = AutoModelForMaskedLM.from_pretrained("simecek/humandna_ELECTRA_1epoch")
x = ["ATGCAT GACTGT ACGTAA", "ATGCAT GACTGT GATTAG", "ATTCAT GACTGT TGAAGA"]
encoded_inputs = tokenizer(x, return_tensors='pt')
for item in encoded_inputs['input_ids']:
decoder = tokenizer.decode(item)
print(decoder)
X_enpr_tensor = model(**encoded_inputs)
print("X_enpr_tensor:", X_enpr_tensor.shape[0]) # (3, 5, 4041)
4041 is the vocab_size
, and the hidden_size
is 256, so the right dimension of output is (3, 5, 256)