|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from pathlib import Path |
|
import json |
|
import tempfile |
|
|
|
from transformers import T5Tokenizer, T5TokenizerFast, T5Config, T5ForConditionalGeneration |
|
from transformers.models.t5.tokenization_t5 import VOCAB_FILES_NAMES |
|
|
|
mname_from = "patrickvonplaten/t5-tiny-random" |
|
mname_very_small = "t5-very-small-random" |
|
|
|
tokenizer = T5Tokenizer.from_pretrained(mname_from) |
|
config = T5Config.from_pretrained(mname_from) |
|
tokenizer_fast = T5TokenizerFast.from_pretrained(mname_from) |
|
|
|
config.update(dict( |
|
vocab_size=32128, |
|
d_model=64, |
|
d_ff=256, |
|
d_kv=8, |
|
num_layers=8, |
|
num_decoder_layers=8, |
|
num_heads=4, |
|
relative_attention_num_buckets=32, |
|
)) |
|
|
|
very_small_model = T5ForConditionalGeneration(config) |
|
print(f"num of params {very_small_model.num_parameters()}") |
|
|
|
|
|
src_texts = ["A long paragraph for summarization.", "Another paragraph for summarization."] |
|
tgt_texts = ["Summary of the text.", "Another summary."] |
|
|
|
batch = tokenizer.prepare_seq2seq_batch(src_texts, tgt_texts, return_tensors="pt") |
|
outputs = very_small_model(**batch) |
|
|
|
print("test output:", len(outputs.logits[0])) |
|
|
|
|
|
very_small_model.half() |
|
very_small_model.save_pretrained(mname_very_small) |
|
config.save_pretrained(mname_very_small) |
|
tokenizer.save_pretrained(mname_very_small) |
|
tokenizer_fast.save_pretrained(mname_very_small) |
|
|
|
print(f"Generated {mname_very_small}") |
|
|
|
|
|
|
|
|
|
|