Spaces:
Sleeping
Sleeping
File size: 3,888 Bytes
59faeae 56266ec 039b503 99300a7 039b503 d36a83c 3f656fb 2205ed4 541fd71 374606d 56266ec 0b15904 56266ec 0b15904 56266ec d36a83c 56266ec 374606d 56266ec 8e3c42c 56266ec 374606d 56266ec 8e3c42c 56266ec 6844ad4 56266ec 99300a7 924cabe 99300a7 56266ec 9d6299e 56266ec 9d6299e 6844ad4 924cabe 6844ad4 518d46d 4d2949d 6844ad4 e4b9572 138836e e4b9572 9627035 880e759 13b8bac 880e759 3679935 13b8bac 880e759 3679935 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 |
# import gradio as gr
import tensorflow as tf
import numpy as np
from keras.models import load_model
from tensorflow.keras.preprocessing.text import Tokenizer
import pickle
from tensorflow.keras.preprocessing.sequence import pad_sequences
import os
from pathlib import Path
import pandas as pd
import plotly.express as px
import keras
import unicodedata as ud
from underthesea import word_tokenize
from phoBERT import BERT_predict
# Load tokenizer
# fp = Path(__file__).with_name('tokenizer.pkl')
# with open(fp,mode="rb") as f:
# tokenizer = pickle.load(f)
#Load LSTM
#fp = Path(__file__).with_name('lstm_model.h5')
LSTM_model = tf.keras.models.load_model('lstm_model.tf')
#Load GRU
#fp = Path(__file__).with_name('gru_model.h5')
GRU_model = tf.keras.models.load_model('gru_model.tf')
def tokenizer_pad(tokenizer,comment_text,max_length=200):
comment_text = word_tokenize(comment_text, format="text")
comment_text = [comment_text]
tokenized_text = tokenizer.texts_to_sequences(comment_text)
padded_sequences = pad_sequences(sequences=tokenized_text,maxlen=max_length,padding="post",truncating="post")
return padded_sequences
def LSTM_predict(x):
# x = tokenizer_pad(tokenizer=tokenizer,comment_text=x)
pred_proba = LSTM_model.predict([x])[0]
pred_proba = [round(i,2) for i in pred_proba]
#print(pred_proba)
return pred_proba
def GRU_predict(x):
# x = tokenizer_pad(tokenizer=tokenizer,comment_text=x)
pred_proba = GRU_model.predict([x])[0]
pred_proba = [round(i,2) for i in pred_proba]
#print(pred_proba)
return pred_proba
def plot(result):
label = ['độc hại', 'cực kì độc hại', 'tục tĩu', 'đe dọa', 'xúc phạm', 'thù ghét cá nhân']
data = pd.DataFrame()
data['Nhãn'] = label
data['Điểm'] = result
#print(data)
p = px.bar(data, x='Nhãn', y='Điểm', color='Nhãn', range_y=[0, 1] )
return p
pass
def judge(x):
label = ['độc hại', 'cực kì độc hại', 'tục tĩu', 'đe dọa', 'xúc phạm', 'thù ghét cá nhân']
result = []
judge_result = []
x = ud.normalize('NFKC', x)
x = word_tokenize(x, format="text")
lstm_pred = LSTM_predict(x)
gru_pred = GRU_predict(x)
#bert_pred = BERT_predict(x)
#print(result)
return_result = 'Result'
result_lstm = np.round(lstm_pred, 2)
result_gru = np.round(gru_pred, 2)
#result_bert = np.round(bert_pred, 2)
for i in range(6):
result.append((result_lstm[i]+result_gru[i])/2)
return (result)
def judgePlus(x):
label = ['độc hại', 'cực kì độc hại', 'tục tĩu', 'đe dọa', 'xúc phạm', 'thù ghét cá nhân']
result = []
judge_result = []
x = ud.normalize('NFKC', x)
x = word_tokenize(x, format="text")
lstm_pred = LSTM_predict(x)
gru_pred = GRU_predict(x)
try:
bert_pred = BERT_predict(x)
except:
bert_pred = np.average([lstm_pred, gru_pred], axis=0)
return_result = 'Result'
result_lstm = np.round(lstm_pred, 2)
result_gru = np.round(gru_pred, 2)
result_bert = np.round(bert_pred, 2)
#result_bert = np.round(bert_pred, 2)
if((result_lstm[0]+result_gru[0])<(result_bert[0]*2)):
for i in range(6):
result.append((result_bert[i])/1)
else:
for i in range(6):
result.append((result_lstm[i]+result_gru[i])/2)
return (result)
def judgeBert(x):
label = ['độc hại', 'cực kì độc hại', 'tục tĩu', 'đe dọa', 'xúc phạm', 'thù ghét cá nhân']
result = []
judge_result = []
x = ud.normalize('NFKC', x)
x = word_tokenize(x, format="text")
try:
bert_pred = BERT_predict(x)
except:
bert_pred = np.zeros(6, dtype=float)
return_result = 'Result'
result_bert = np.round(bert_pred, 2)
#result_bert = np.round(bert_pred, 2)
for i in range(6):
result.append((result_bert[i])/1)
return (result)
|