azizbarank commited on
Commit
7fdd9ce
·
1 Parent(s): b11a3d9

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -0
app.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ os.system('pip install nltk')
3
+ os.system('pip install sklearn')
4
+
5
+ import nltk
6
+
7
+ nltk.download('punkt')
8
+ nltk.download('stopwords')
9
+ nltk.download('wordnet')
10
+ nltk.download('omw-1.4')
11
+
12
+ # importing relevant python packages
13
+ import streamlit as st
14
+ import pandas as pd
15
+ import numpy as np
16
+ import pickle
17
+ import joblib
18
+ from PIL import Image
19
+ # preprocessing
20
+ import re
21
+ import string
22
+ import nltk
23
+ from nltk.corpus import stopwords
24
+ from nltk.stem import WordNetLemmatizer
25
+ from sklearn.feature_extraction.text import TfidfVectorizer
26
+ # modeling
27
+ from sklearn import svm
28
+ # sentiment analysis
29
+
30
+
31
+ # creating page sections
32
+ site_header = st.container()
33
+ business_context = st.container()
34
+ data_desc = st.container()
35
+ performance = st.container()
36
+ tweet_input = st.container()
37
+ model_results = st.container()
38
+ sentiment_analysis = st.container()
39
+ contact = st.container()
40
+
41
+ with site_header:
42
+ st.title('Toxic Comment Detection')
43
+
44
+
45
+ with tweet_input:
46
+ st.header('Is Your Tweet Considered Hate Speech?')
47
+ st.write("""*Please note that this prediction is based on how the model was trained, so it may not be an accurate representation.*""")
48
+ # user input here
49
+ user_text = st.text_input('Enter Tweet', max_chars=280) # setting input as user_text
50
+
51
+ with model_results:
52
+ st.subheader('Prediction:')
53
+ if user_text:
54
+ # processing user_text
55
+ # removing punctuation
56
+ user_text = re.sub('[%s]' % re.escape(string.punctuation), '', user_text)
57
+ # tokenizing
58
+ stop_words = set(stopwords.words('english'))
59
+ tokens = nltk.word_tokenize(user_text)
60
+ # removing stop words
61
+ stopwords_removed = [token.lower() for token in tokens if token.lower() not in stop_words]
62
+ # taking root word
63
+ lemmatizer = WordNetLemmatizer()
64
+ lemmatized_output = []
65
+ for word in stopwords_removed:
66
+ lemmatized_output.append(lemmatizer.lemmatize(word))
67
+
68
+ # instantiating count vectorizor
69
+ tfidf = TfidfVectorizer(stop_words=stop_words)
70
+ X_train = joblib.load(open('X_train.pickle', 'rb'))
71
+ X_test = lemmatized_output
72
+ X_train_count = tfidf.fit_transform(X_train)
73
+ X_test_count = tfidf.transform(X_test)
74
+
75
+ # loading in model
76
+ final_model = joblib.load(open('final_bayes.pickle', 'rb'))
77
+
78
+ # apply model to make predictions
79
+ prediction = final_model.predict(X_test_count[0])
80
+
81
+ if prediction == 0:
82
+ st.subheader('**Not Hate Speech**')
83
+ else:
84
+ st.subheader('**Hate Speech**')
85
+ st.text('')