import os from pathlib import Path import numpy as np import torch from transformers import GPT2LMHeadModel, GPT2Tokenizer ,GPT2Model current_path = os.path.dirname(os.path.abspath(__file__)) tokenizer_path = os.path.join(current_path, "gpt_tokenizer") model_path = os.path.join(current_path, "gpt2_3epoch") tokenizer = GPT2Tokenizer.from_pretrained(tokenizer_path) # also try gpt2-medium model = GPT2LMHeadModel.from_pretrained(model_path) extra_tokens = ["", ""] tokenizer.add_tokens(extra_tokens) tokenizer.add_special_tokens({'pad_token': '[PAD]'}) model.resize_token_embeddings(len(tokenizer)) def subject_gen_func(email): device = "cpu" prompt = email input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device) attention_mask = torch.ones_like(input_ids) pad_token_id = tokenizer.eos_token_id output_ids = model.generate(input_ids, max_length=1024, num_return_sequences=1,attention_mask=attention_mask, pad_token_id=pad_token_id) generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True) return generated_text