jclge commited on
Commit
508a685
·
1 Parent(s): 1da93ec

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +154 -0
app.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import pickle as pkl
3
+ from sklearn.preprocessing import StandardScaler
4
+ from sklearn.model_selection import train_test_split
5
+ from sklearn.dummy import DummyClassifier
6
+ from sklearn.feature_extraction.text import CountVectorizer
7
+ from sklearn.linear_model import Perceptron
8
+ from numpy import reshape
9
+ import numpy as np
10
+ from sklearn.metrics import accuracy_score
11
+ from sklearn.metrics import classification_report
12
+ from sklearn.naive_bayes import GaussianNB
13
+ from sklearn.neighbors import KNeighborsClassifier
14
+ from sklearn.linear_model import Perceptron
15
+ from sklearn.dummy import DummyClassifier
16
+ from sklearn.ensemble import RandomForestClassifier
17
+ from sklearn.neural_network import MLPClassifier
18
+ from sklearn import svm
19
+ import gradio as gr
20
+
21
+ class NLP:
22
+ def __init__(self) -> None:
23
+ self.__path = "models/"
24
+ self.__exec = {"Perceptron": [self.perceptron_pol_eval, self.perceptron_rat_eval], "K-Neighbors": [self.kneighbors_pol_eval, self.kneighbors_rat_eval], "Naive Bayes": [self.NB_pol_eval, self.NB_rat_eval], "SVM": [self.SVM_pol_eval, self.SVM_rat_eval], "Random Forest": [self.RF_pol_eval, self.RF_rat_eval], "NN (MLP)": [self.MLP_pol_eval, self.MLP_rat_eval], "Dummy (Baseline)": [self.Dummy_pol_eval, self.Dummy_rat_eval]}
25
+ self.__get_vocabulary()
26
+ self.__vectorizer_pol = pkl.load(open(self.__path + "vectorizer_pol.pkl", 'rb'))
27
+ self.__vectorizer_rat = pkl.load(open(self.__path + "vectorizer_rat.pkl", 'rb'))
28
+ self.__X_pol_test = pkl.load(open(self.__path + "X_pol_test.pkl", 'rb'))
29
+ self.__y_pol_test = pkl.load(open(self.__path + "y_pol_test.pkl", 'rb'))
30
+ self.__X_rat_test = pkl.load(open(self.__path + "X_rat_test.pkl", 'rb'))
31
+ self.__y_rat_test = pkl.load(open(self.__path + "y_rat_test.pkl", 'rb'))
32
+ self.__get_models()
33
+
34
+ def __get_models(self):
35
+ self.__perceptron_pol = pkl.load(open(self.__path + "perceptron_pol.pkl", 'rb'))
36
+ self.__perceptron_pol_score = self.__perceptron_pol.score(self.__X_pol_test, self.__y_pol_test)
37
+ self.__perceptron_rat = pkl.load(open(self.__path + "perceptron_rat.pkl", 'rb'))
38
+ self.__perceptron_rat_score = self.__perceptron_rat.score(self.__X_rat_test, self.__y_rat_test)
39
+
40
+ self.__rf_pol = pkl.load(open(self.__path + "rf_pol.pkl", 'rb'))
41
+ self.__rf_pol_score = self.__rf_pol.score(self.__X_pol_test, self.__y_pol_test)
42
+ self.__rf_rat = pkl.load(open(self.__path + "rf_rat.pkl", 'rb'))
43
+ self.__rf_rat_score = self.__rf_rat.score(self.__X_rat_test, self.__y_rat_test)
44
+
45
+ self.__nb_pol = pkl.load(open(self.__path + "nb_pol.pkl", 'rb'))
46
+ self.__nb_pol_score = self.__nb_pol.score(self.__X_pol_test, self.__y_pol_test)
47
+ self.__nb_rat = pkl.load(open(self.__path + "nb_rat.pkl", 'rb'))
48
+ self.__nb_rat_score = self.__nb_rat.score(self.__X_rat_test, self.__y_rat_test)
49
+
50
+ # self.__svm_pol = pkl.load(open(self.__path + "svm_pol.pkl", 'rb'))
51
+ # self.__svm_pol_score = self.__svm_pol.score(self.__X_pol_test, self.__y_pol_test)
52
+ # self.__svm_rat = pkl.load(open(self.__path + "svm_rat.pkl", 'rb'))
53
+ # self.__svm_rat_score = self.__svm_rat.score(self.__X_rat_test, self.__y_rat_test)
54
+
55
+ self.__k_neighbors_pol = pkl.load(open(self.__path + "kneighbors_pol.pkl", 'rb'))
56
+ self.__k_neighbors_pol_score = self.__k_neighbors_pol.score(self.__X_pol_test, self.__y_pol_test)
57
+ self.__k_neighbors_rat = pkl.load(open(self.__path + "kneighbors_rat.pkl", 'rb'))
58
+ self.__k_neighbors_rat_score = self.__k_neighbors_rat.score(self.__X_rat_test, self.__y_rat_test)
59
+
60
+ self.__dummy_pol = pkl.load(open(self.__path + "dummy_pol.pkl", 'rb'))
61
+ self.__dummy_pol_score = self.__dummy_pol.score(self.__X_pol_test, self.__y_pol_test)
62
+ self.__dummy_rat = pkl.load(open(self.__path + "dummy_rat.pkl", 'rb'))
63
+ self.__dummy_rat_score = self.__dummy_rat.score(self.__X_rat_test, self.__y_rat_test))
64
+
65
+ self.__clf_pol = pkl.load(open(self.__path + "clf_pol.pkl", 'rb'))
66
+ self.__clf_pol_score = self.__clf_pol.score(self.__X_pol_test, self.__y_pol_test)
67
+ self.__clf_rat = pkl.load(open(self.__path + "clf_rat.pkl", 'rb'))
68
+ self.__clf_rat_score = self.__clf_rat.score(self.__X_rat_test, self.__y_rat_test)
69
+
70
+ def perceptron_pol_eval(self, evalu):
71
+ tmp = self.__perceptron_pol.predict(evalu)
72
+ return([[tmp, 1-tmp]], str(self.__perceptron_pol_score))
73
+
74
+ def perceptron_rat_eval(self, evalu):
75
+ tmp = self.__perceptron_rat.predict(evalu)
76
+ if (tmp == 5):
77
+ tmp = [[0, 0, 0, 1]]
78
+ elif (tmp == 4):
79
+ tmp = [[0, 0, 1, 0]]
80
+ elif (tmp == 2):
81
+ tmp = [[0, 1, 0, 0]]
82
+ else:
83
+ tmp = [[1, 0, 0, 0]]
84
+ return(tmp, str(self.__perceptron_rat_score))
85
+
86
+ def kneighbors_pol_eval(self, evalu):
87
+ return(self.__k_neighbors_pol.predict_proba(evalu).tolist(), str(self.__k_neighbors_rat_score))
88
+
89
+ def kneighbors_rat_eval(self, evalu):
90
+ return(self.__k_neighbors_rat.predict_proba(evalu).tolist(), str(self.__k_neighbors_rat_score))
91
+
92
+ def NB_pol_eval(self, evalu):
93
+ return(self.__nb_pol.predict_proba(evalu).tolist(), str(self.__nb_pol_score))
94
+
95
+ def NB_rat_eval(self, evalu):
96
+ return(self.__nb_rat.predict_proba(evalu).tolist(), str(self.__nb_rat_score))
97
+
98
+ def SVM_pol_eval(self, evalu):
99
+ return(self.__svm_pol.predict_proba(evalu).tolist(), str(self.__svm_pol_score))
100
+
101
+ def SVM_rat_eval(self, evalu):
102
+ return(self.__svm_rat.predict_proba(evalu).tolist(), str(self.__svm_rat_score))
103
+
104
+ def RF_pol_eval(self, evalu):
105
+ return(self.__rf_pol.predict_proba(evalu).tolist(), str(self.__rf_pol_score))
106
+
107
+ def RF_rat_eval(self, evalu):
108
+ return(self.__rf_rat.predict_proba(evalu).tolist(), str(self.__rf_rat_score))
109
+
110
+ def MLP_pol_eval(self, evalu):
111
+ return(self.__clf_pol.predict_proba(evalu).tolist(), str(self.__clf_pol_score))
112
+
113
+ def MLP_rat_eval(self, evalu):
114
+ return(self.__clf_rat.predict_proba(evalu).tolist(), str(self.__clf_rat_score))
115
+
116
+ def Dummy_pol_eval(self, evalu):
117
+ return(self.__dummy_pol.predict_proba(evalu).tolist(), self.__dummy_pol_score)
118
+
119
+ def Dummy_rat_eval(self, evalu):
120
+ tmp = self.__dummy_rat.predict_proba(evalu).tolist()
121
+ return(tmp, self.__dummy_rat.score)
122
+
123
+ def __get_vocabulary(self):
124
+ with open("dataset/vocabulary_polarity.txt", "r") as o:
125
+ res = o.read()
126
+ self.__vocabulary = res.split("\n")
127
+ self.__vocabulary = list(set(self.__vocabulary))
128
+
129
+ def Tokenizer(self, text):
130
+ tmp = self.__vectorizer_pol.transform([text])
131
+ tmp = tmp.toarray()
132
+ return (tmp)
133
+
134
+ def Manage(self, model, Dataset, review):
135
+ if (Dataset == "Binary"):
136
+ percent, score = self.__exec[model][0](review)
137
+ res = pd.DataFrame({'Positive': percent[0][0], 'Negative': percent[0][1]}, index=["Prediction"])
138
+ else:
139
+ percent, score = self.__exec[model][1](review)
140
+ res = pd.DataFrame({'Rated 1/5': percent[0][0], 'Rated 2/5': percent[0][1], 'Rated 4/5': percent[0][2], 'Rated 5/5': percent[0][3]}, index=["Prediction"])
141
+
142
+ return (res, f"Model: {model}\nDataset: {Dataset}\nAccuracy: {str(float(score)*100)}")
143
+
144
+
145
+ if __name__ == "__main__":
146
+ class Execution:
147
+ def __init__(self):
148
+ self.__n = NLP()
149
+
150
+ def greet(self, Model, Dataset, Review):
151
+ return(self.__n.Manage(Model, Dataset, self.__n.Tokenizer(Review)))
152
+
153
+ e = Execution()
154
+ gr.Interface(e.greet, [gr.inputs.Dropdown(["Perceptron", "K-Neighbors", "Naive Bayes", "SVM", "Random Forest", "NN (MLP)", "Dummy (Baseline)"]), gr.inputs.Dropdown(["Binary", "Rating"]), "text"], [gr.outputs.Dataframe(), "text"]).launch()