azizbarank commited on
Commit
44f401b
·
1 Parent(s): 47d15d4

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -84
app.py DELETED
@@ -1,84 +0,0 @@
1
- import os
2
- os.system('pip install nltk')
3
- os.system('pip install sklearn')
4
-
5
- import nltk
6
-
7
- nltk.download('punkt')
8
- nltk.download('stopwords')
9
- nltk.download('wordnet')
10
- nltk.download('omw-1.4')
11
-
12
- # importing relevant python packages
13
- import streamlit as st
14
- import pandas as pd
15
- import numpy as np
16
- import pickle
17
- from PIL import Image
18
- # preprocessing
19
- import re
20
- import string
21
- import nltk
22
- from nltk.corpus import stopwords
23
- from nltk.stem import WordNetLemmatizer
24
- from sklearn.feature_extraction.text import TfidfVectorizer
25
- # modeling
26
- from sklearn import svm
27
- # sentiment analysis
28
-
29
-
30
- # creating page sections
31
- site_header = st.container()
32
- business_context = st.container()
33
- data_desc = st.container()
34
- performance = st.container()
35
- tweet_input = st.container()
36
- model_results = st.container()
37
- sentiment_analysis = st.container()
38
- contact = st.container()
39
-
40
- with site_header:
41
- st.title('Toxic Comment Detection')
42
-
43
-
44
- with tweet_input:
45
- st.header('Is Your Tweet Considered Hate Speech?')
46
- st.write("""*Please note that this prediction is based on how the model was trained, so it may not be an accurate representation.*""")
47
- # user input here
48
- user_text = st.text_input('Enter Tweet', max_chars=280) # setting input as user_text
49
-
50
- with model_results:
51
- st.subheader('Prediction:')
52
- if user_text:
53
- # processing user_text
54
- # removing punctuation
55
- user_text = re.sub('[%s]' % re.escape(string.punctuation), '', user_text)
56
- # tokenizing
57
- stop_words = set(stopwords.words('english'))
58
- tokens = nltk.word_tokenize(user_text)
59
- # removing stop words
60
- stopwords_removed = [token.lower() for token in tokens if token.lower() not in stop_words]
61
- # taking root word
62
- lemmatizer = WordNetLemmatizer()
63
- lemmatized_output = []
64
- for word in stopwords_removed:
65
- lemmatized_output.append(lemmatizer.lemmatize(word))
66
-
67
- # instantiating count vectorizor
68
- tfidf = TfidfVectorizer(stop_words=stop_words)
69
- X_train = pickle.load(open('X_train.pickle', 'rb'))
70
- X_test = lemmatized_output
71
- X_train_count = tfidf.fit_transform(X_train)
72
- X_test_count = tfidf.transform(X_test)
73
-
74
- # loading in model
75
- final_model = pickle.load(open('final_bayes.pickle', 'rb'))
76
-
77
- # apply model to make predictions
78
- prediction = final_model.predict(X_test_count[0])
79
-
80
- if prediction == 0:
81
- st.subheader('**Not Hate Speech**')
82
- else:
83
- st.subheader('**Hate Speech**')
84
- st.text('')