Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,28 @@
|
|
1 |
-
|
|
|
|
|
2 |
|
3 |
-
|
4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
!pip install pyarabic
|
2 |
+
!pip install farasapy
|
3 |
+
!git clone https://github.com/aub-mind/arabert
|
4 |
|
5 |
+
from arabert.preprocess import ArabertPreprocessor
|
6 |
+
from transformers import EncoderDecoderModel, AutoTokenizer
|
7 |
+
tokenizer = AutoTokenizer.from_pretrained("tareknaous/bert2bert-empathetic-response-msa")
|
8 |
+
model = EncoderDecoderModel.from_pretrained("tareknaous/bert2bert-empathetic-response-msa")
|
9 |
+
model.to("cuda")
|
10 |
+
model.eval()
|
11 |
+
arabert_prep = ArabertPreprocessor(model_name="bert-base-arabert", keep_emojis=False)
|
12 |
+
def generate_response(text):
|
13 |
+
text_clean = arabert_prep.preprocess(text)
|
14 |
+
inputs = tokenizer.encode_plus(text_clean,return_tensors='pt')
|
15 |
+
outputs = model.generate(input_ids = inputs.input_ids.to("cuda"),
|
16 |
+
attention_mask = inputs.attention_mask.to("cuda"),
|
17 |
+
do_sample = True,
|
18 |
+
min_length=10,
|
19 |
+
top_k = 0,
|
20 |
+
top_p = 0.9,
|
21 |
+
temperature = 0.5)
|
22 |
+
preds = tokenizer.batch_decode(outputs)
|
23 |
+
response = str(preds)
|
24 |
+
response = response.replace("\'", '')
|
25 |
+
response = response.replace("[[CLS]", '')
|
26 |
+
response = response.replace("[SEP]]", '')
|
27 |
+
response = str(arabert_prep.desegment(response))
|
28 |
+
return response
|