Spaces:
Running
Running
Update phoBERT.py
Browse files- phoBERT.py +12 -9
phoBERT.py
CHANGED
@@ -2,6 +2,7 @@ import torch
|
|
2 |
from transformers import AutoModel, AutoTokenizer
|
3 |
from underthesea import word_tokenize
|
4 |
import __main__
|
|
|
5 |
|
6 |
|
7 |
#phobert = AutoModel.from_pretrained("vinai/phobert-base")
|
@@ -69,7 +70,7 @@ def tokenize(data):
|
|
69 |
return output
|
70 |
|
71 |
def BERT_predict(text):
|
72 |
-
|
73 |
text = [text]
|
74 |
token = tokenize(text)
|
75 |
|
@@ -79,13 +80,15 @@ def BERT_predict(text):
|
|
79 |
|
80 |
result = model(ids, mask, token_type_ids)
|
81 |
# print(result)
|
|
|
|
|
82 |
return result.tolist()[0]
|
83 |
|
84 |
-
print(BERT_predict("xin chaof"))
|
85 |
-
print(BERT_predict("con chó"))
|
86 |
-
print(BERT_predict("đồ chó"))
|
87 |
-
print(BERT_predict("đồ ngu"))
|
88 |
-
print(BERT_predict("cái lồn"))
|
89 |
-
print(BERT_predict("óc chó"))
|
90 |
-
print(BERT_predict("đồ chó đẻ"))
|
91 |
-
print(BERT_predict("con đĩ"))
|
|
|
2 |
from transformers import AutoModel, AutoTokenizer
|
3 |
from underthesea import word_tokenize
|
4 |
import __main__
|
5 |
+
import time
|
6 |
|
7 |
|
8 |
#phobert = AutoModel.from_pretrained("vinai/phobert-base")
|
|
|
70 |
return output
|
71 |
|
72 |
def BERT_predict(text):
|
73 |
+
t1 = time.time()
|
74 |
text = [text]
|
75 |
token = tokenize(text)
|
76 |
|
|
|
80 |
|
81 |
result = model(ids, mask, token_type_ids)
|
82 |
# print(result)
|
83 |
+
t2 = time.time()
|
84 |
+
print(f'phoBERT: {t2-t1}s')
|
85 |
return result.tolist()[0]
|
86 |
|
87 |
+
# print(BERT_predict("xin chaof"))
|
88 |
+
# print(BERT_predict("con chó"))
|
89 |
+
# print(BERT_predict("đồ chó"))
|
90 |
+
# print(BERT_predict("đồ ngu"))
|
91 |
+
# print(BERT_predict("cái lồn"))
|
92 |
+
# print(BERT_predict("óc chó"))
|
93 |
+
# print(BERT_predict("đồ chó đẻ"))
|
94 |
+
# print(BERT_predict("con đĩ"))
|