File size: 1,122 Bytes
0752735 8c3811f 16da06e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
from arabert.preprocess import ArabertPreprocessor
from transformers import EncoderDecoderModel, AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("tareknaous/bert2bert-empathetic-response-msa")
model = EncoderDecoderModel.from_pretrained("tareknaous/bert2bert-empathetic-response-msa")
model.to("cuda")
model.eval()
arabert_prep = ArabertPreprocessor(model_name="bert-base-arabert", keep_emojis=False)
def generate_response(text):
text_clean = arabert_prep.preprocess(text)
inputs = tokenizer.encode_plus(text_clean,return_tensors='pt')
outputs = model.generate(input_ids = inputs.input_ids.to("cuda"),
attention_mask = inputs.attention_mask.to("cuda"),
do_sample = True,
min_length=10,
top_k = 0,
top_p = 0.9,
temperature = 0.5)
preds = tokenizer.batch_decode(outputs)
response = str(preds)
response = response.replace("\'", '')
response = response.replace("[[CLS]", '')
response = response.replace("[SEP]]", '')
response = str(arabert_prep.desegment(response))
return response |