from transformers import GPT2LMHeadModel, GPT2Tokenizer | |
# Load the pre-trained model and tokenizer | |
tokenizer = GPT2Tokenizer.from_pretrained("sberbank-ai/mGPT") | |
model = GPT2LMHeadModel.from_pretrained("sberbank-ai/mGPT") | |
# Input text | |
text = "Александр Сергеевич Пушкин родился в " | |
# Encode the input text | |
input_ids = tokenizer.encode(text, return_tensors="pt") | |
# Generate text | |
out = model.generate( | |
input_ids, | |
min_length=100, | |
max_length=100, | |
eos_token_id=5, | |
pad_token_id=1, | |
top_k=10, | |
top_p=0.0, | |
no_repeat_ngram_size=5 | |
) | |
# Decode the generated output | |
generated_text = list(map(tokenizer.decode, out))[0] | |
print(generated_text) | |