File size: 1,122 Bytes
0752735
8c3811f
16da06e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26


from arabert.preprocess import ArabertPreprocessor
from transformers import EncoderDecoderModel, AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("tareknaous/bert2bert-empathetic-response-msa")
model = EncoderDecoderModel.from_pretrained("tareknaous/bert2bert-empathetic-response-msa")
model.to("cuda")
model.eval()
arabert_prep = ArabertPreprocessor(model_name="bert-base-arabert", keep_emojis=False)
def generate_response(text):
  text_clean = arabert_prep.preprocess(text)
  inputs = tokenizer.encode_plus(text_clean,return_tensors='pt')
  outputs = model.generate(input_ids = inputs.input_ids.to("cuda"),
                   attention_mask = inputs.attention_mask.to("cuda"),
                   do_sample = True,
                   min_length=10,
                   top_k = 0,
                   top_p = 0.9,
                   temperature = 0.5)
  preds = tokenizer.batch_decode(outputs) 
  response = str(preds)
  response = response.replace("\'", '')
  response = response.replace("[[CLS]", '')
  response = response.replace("[SEP]]", '')
  response = str(arabert_prep.desegment(response))
  return response