azizbarank commited on
Commit
5217c73
·
1 Parent(s): e6ecb8f

Delete python.py

Browse files
Files changed (1) hide show
  1. python.py +0 -93
python.py DELETED
@@ -1,93 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- """PYTHON.ipynb
3
-
4
- Automatically generated by Colaboratory.
5
-
6
- Original file is located at
7
- https://colab.research.google.com/drive/1NcMpq7CN_j3cSirY034F_NcEvpzYgnZ4
8
- """
9
-
10
- import os
11
- os.system('pip install nltk')
12
- os.system('pip install sklearn')
13
-
14
- import nltk
15
-
16
- nltk.download('punkt')
17
- nltk.download('stopwords')
18
- nltk.download('wordnet')
19
- nltk.download('omw-1.4')
20
-
21
- # importing relevant python packages
22
- import streamlit as st
23
- import pandas as pd
24
- import numpy as np
25
- import pickle
26
- from PIL import Image
27
- # preprocessing
28
- import re
29
- import string
30
- import nltk
31
- from nltk.corpus import stopwords
32
- from nltk.stem import WordNetLemmatizer
33
- from sklearn.feature_extraction.text import TfidfVectorizer
34
- # modeling
35
- from sklearn import svm
36
- # sentiment analysis
37
-
38
-
39
- # creating page sections
40
- site_header = st.container()
41
- business_context = st.container()
42
- data_desc = st.container()
43
- performance = st.container()
44
- tweet_input = st.container()
45
- model_results = st.container()
46
- sentiment_analysis = st.container()
47
- contact = st.container()
48
-
49
- with site_header:
50
- st.title('Toxic Comment Detection')
51
-
52
-
53
- with tweet_input:
54
- st.header('Is Your Tweet Considered Hate Speech?')
55
- st.write("""*Please note that this prediction is based on how the model was trained, so it may not be an accurate representation.*""")
56
- # user input here
57
- user_text = st.text_input('Enter Tweet', max_chars=280) # setting input as user_text
58
-
59
- with model_results:
60
- st.subheader('Prediction:')
61
- if user_text:
62
- # processing user_text
63
- # removing punctuation
64
- user_text = re.sub('[%s]' % re.escape(string.punctuation), '', user_text)
65
- # tokenizing
66
- stop_words = set(stopwords.words('english'))
67
- tokens = nltk.word_tokenize(user_text)
68
- # removing stop words
69
- stopwords_removed = [token.lower() for token in tokens if token.lower() not in stop_words]
70
- # taking root word
71
- lemmatizer = WordNetLemmatizer()
72
- lemmatized_output = []
73
- for word in stopwords_removed:
74
- lemmatized_output.append(lemmatizer.lemmatize(word))
75
-
76
- # instantiating count vectorizor
77
- tfidf = TfidfVectorizer(stop_words=stop_words)
78
- X_train = pickle.load(open('X_train.pickle', 'rb'))
79
- X_test = lemmatized_output
80
- X_train_count = tfidf.fit_transform(X_train)
81
- X_test_count = tfidf.transform(X_test)
82
-
83
- # loading in model
84
- final_model = pickle.load(open('final_bayes.pickle', 'rb'))
85
-
86
- # apply model to make predictions
87
- prediction = final_model.predict(X_test_count[0])
88
-
89
- if prediction == 0:
90
- st.subheader('**Not Hate Speech**')
91
- else:
92
- st.subheader('**Hate Speech**')
93
- st.text('')