azizbarank commited on
Commit
b11a3d9
·
1 Parent(s): 4002604

Delete app

Browse files
Files changed (1) hide show
  1. app +0 -85
app DELETED
@@ -1,85 +0,0 @@
1
- import os
2
- os.system('pip install nltk')
3
- os.system('pip install sklearn')
4
-
5
- import nltk
6
-
7
- nltk.download('punkt')
8
- nltk.download('stopwords')
9
- nltk.download('wordnet')
10
- nltk.download('omw-1.4')
11
-
12
- # importing relevant python packages
13
- import streamlit as st
14
- import pandas as pd
15
- import numpy as np
16
- import pickle
17
- import joblib
18
- from PIL import Image
19
- # preprocessing
20
- import re
21
- import string
22
- import nltk
23
- from nltk.corpus import stopwords
24
- from nltk.stem import WordNetLemmatizer
25
- from sklearn.feature_extraction.text import TfidfVectorizer
26
- # modeling
27
- from sklearn import svm
28
- # sentiment analysis
29
-
30
-
31
- # creating page sections
32
- site_header = st.container()
33
- business_context = st.container()
34
- data_desc = st.container()
35
- performance = st.container()
36
- tweet_input = st.container()
37
- model_results = st.container()
38
- sentiment_analysis = st.container()
39
- contact = st.container()
40
-
41
- with site_header:
42
- st.title('Toxic Comment Detection')
43
-
44
-
45
- with tweet_input:
46
- st.header('Is Your Tweet Considered Hate Speech?')
47
- st.write("""*Please note that this prediction is based on how the model was trained, so it may not be an accurate representation.*""")
48
- # user input here
49
- user_text = st.text_input('Enter Tweet', max_chars=280) # setting input as user_text
50
-
51
- with model_results:
52
- st.subheader('Prediction:')
53
- if user_text:
54
- # processing user_text
55
- # removing punctuation
56
- user_text = re.sub('[%s]' % re.escape(string.punctuation), '', user_text)
57
- # tokenizing
58
- stop_words = set(stopwords.words('english'))
59
- tokens = nltk.word_tokenize(user_text)
60
- # removing stop words
61
- stopwords_removed = [token.lower() for token in tokens if token.lower() not in stop_words]
62
- # taking root word
63
- lemmatizer = WordNetLemmatizer()
64
- lemmatized_output = []
65
- for word in stopwords_removed:
66
- lemmatized_output.append(lemmatizer.lemmatize(word))
67
-
68
- # instantiating count vectorizor
69
- tfidf = TfidfVectorizer(stop_words=stop_words)
70
- X_train = joblib.load(open('X_train.pickle', 'rb'))
71
- X_test = lemmatized_output
72
- X_train_count = tfidf.fit_transform(X_train)
73
- X_test_count = tfidf.transform(X_test)
74
-
75
- # loading in model
76
- final_model = joblib.load(open('final_bayes.pickle', 'rb'))
77
-
78
- # apply model to make predictions
79
- prediction = final_model.predict(X_test_count[0])
80
-
81
- if prediction == 0:
82
- st.subheader('**Not Hate Speech**')
83
- else:
84
- st.subheader('**Hate Speech**')
85
- st.text('')