Vrooh933 commited on
Commit
4aaa7da
·
1 Parent(s): 4c38a42

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -0
app.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import joblib
3
+ import re
4
+ import string
5
+ from nltk.corpus import stopwords
6
+ from nltk.tokenize import word_tokenize
7
+ from nltk.stem import PorterStemmer, WordNetLemmatizer
8
+ import nltk
9
+ from keras.models import load_model
10
+ nltk.download('stopwords')
11
+ stop_words = set(stopwords.words('english'))
12
+
13
+ # downloading the additional resources required by nltk
14
+ nltk.download('punkt')
15
+ nltk.download('wordnet')
16
+ nltk.download('omw-1.4')
17
+
18
+ # model initiation
19
+ import xgboost
20
+ cv = joblib.load('finalized_model.sav')
21
+ model = joblib.load('BestModels/best_rf.sav')
22
+
23
+ def preprocess_text(text):
24
+ """
25
+ Runs a set of transformational steps to
26
+ preprocess the text of the tweet.
27
+ """
28
+ # convert all text to lower case
29
+ text = text.lower()
30
+
31
+ # remove any urls
32
+ text = re.sub(r'http\S+|www\S+|https\S+', "", text, flags=re.MULTILINE)
33
+
34
+ # replace '****' with 'curse'
35
+ text = re.sub(r'\*\*\*\*', "gaali", text)
36
+
37
+ # remove punctuations
38
+ text = text.translate(str.maketrans("", "", string.punctuation))
39
+
40
+ # remove user @ references and hashtags
41
+ text = re.sub(r'\@\w+|\#', "", text)
42
+
43
+ # remove useless characters
44
+ text = re.sub(r'[^ -~]', '', text)
45
+
46
+ # remove stopwords
47
+ tweet_tokens = word_tokenize(text)
48
+ filtered_words = [word for word in tweet_tokens if word not in stop_words]
49
+
50
+ # stemming
51
+ ps = PorterStemmer()
52
+ stemmed_words = [ps.stem(w) for w in filtered_words]
53
+
54
+ # lemmatizing
55
+ lemmatizer = WordNetLemmatizer()
56
+ lemma_words = [lemmatizer.lemmatize(w, pos='a') for w in stemmed_words]
57
+
58
+ return ' '.join(lemma_words)
59
+
60
+ def sentiment_analysis(text):
61
+
62
+ # print(text)
63
+ text = cv.transform([preprocess_text(text)])
64
+ pred_prob = model.predict_proba(text)[0]
65
+
66
+ output = {"not_cyberbullying": float(pred_prob[0]),
67
+ "gender": float(pred_prob[1]),
68
+ "religion": float(pred_prob[2]),
69
+ "age": float(pred_prob[3]),
70
+ "ethnicity": float(pred_prob[4]),
71
+ "other_cyberbullying": float(pred_prob[5])}
72
+
73
+ # print(output)
74
+ return output
75
+
76
+
77
+ intfc = gr.Interface(
78
+ fn=sentiment_analysis,
79
+ inputs=gr.Textbox(label="Input here", lines=2, placeholder="Input your text"),
80
+ outputs=gr.Label(label="Sentiment Analysis"),
81
+ )
82
+ intfc.launch(share=True)