azizbarank commited on
Commit
e6ecb8f
·
1 Parent(s): 44f401b

Upload python.py

Browse files
Files changed (1) hide show
  1. python.py +93 -0
python.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """PYTHON.ipynb
3
+
4
+ Automatically generated by Colaboratory.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1NcMpq7CN_j3cSirY034F_NcEvpzYgnZ4
8
+ """
9
+
10
+ import os
11
+ os.system('pip install nltk')
12
+ os.system('pip install sklearn')
13
+
14
+ import nltk
15
+
16
+ nltk.download('punkt')
17
+ nltk.download('stopwords')
18
+ nltk.download('wordnet')
19
+ nltk.download('omw-1.4')
20
+
21
+ # importing relevant python packages
22
+ import streamlit as st
23
+ import pandas as pd
24
+ import numpy as np
25
+ import pickle
26
+ from PIL import Image
27
+ # preprocessing
28
+ import re
29
+ import string
30
+ import nltk
31
+ from nltk.corpus import stopwords
32
+ from nltk.stem import WordNetLemmatizer
33
+ from sklearn.feature_extraction.text import TfidfVectorizer
34
+ # modeling
35
+ from sklearn import svm
36
+ # sentiment analysis
37
+
38
+
39
+ # creating page sections
40
+ site_header = st.container()
41
+ business_context = st.container()
42
+ data_desc = st.container()
43
+ performance = st.container()
44
+ tweet_input = st.container()
45
+ model_results = st.container()
46
+ sentiment_analysis = st.container()
47
+ contact = st.container()
48
+
49
+ with site_header:
50
+ st.title('Toxic Comment Detection')
51
+
52
+
53
+ with tweet_input:
54
+ st.header('Is Your Tweet Considered Hate Speech?')
55
+ st.write("""*Please note that this prediction is based on how the model was trained, so it may not be an accurate representation.*""")
56
+ # user input here
57
+ user_text = st.text_input('Enter Tweet', max_chars=280) # setting input as user_text
58
+
59
+ with model_results:
60
+ st.subheader('Prediction:')
61
+ if user_text:
62
+ # processing user_text
63
+ # removing punctuation
64
+ user_text = re.sub('[%s]' % re.escape(string.punctuation), '', user_text)
65
+ # tokenizing
66
+ stop_words = set(stopwords.words('english'))
67
+ tokens = nltk.word_tokenize(user_text)
68
+ # removing stop words
69
+ stopwords_removed = [token.lower() for token in tokens if token.lower() not in stop_words]
70
+ # taking root word
71
+ lemmatizer = WordNetLemmatizer()
72
+ lemmatized_output = []
73
+ for word in stopwords_removed:
74
+ lemmatized_output.append(lemmatizer.lemmatize(word))
75
+
76
+ # instantiating count vectorizor
77
+ tfidf = TfidfVectorizer(stop_words=stop_words)
78
+ X_train = pickle.load(open('X_train.pickle', 'rb'))
79
+ X_test = lemmatized_output
80
+ X_train_count = tfidf.fit_transform(X_train)
81
+ X_test_count = tfidf.transform(X_test)
82
+
83
+ # loading in model
84
+ final_model = pickle.load(open('final_bayes.pickle', 'rb'))
85
+
86
+ # apply model to make predictions
87
+ prediction = final_model.predict(X_test_count[0])
88
+
89
+ if prediction == 0:
90
+ st.subheader('**Not Hate Speech**')
91
+ else:
92
+ st.subheader('**Hate Speech**')
93
+ st.text('')