Spaces:

azizbarank
/

Toxic-Comment-Detection-App

Runtime error

App Files Files Community

azizbarank commited on Jun 6, 2022

Commit

5217c73

1 Parent(s): e6ecb8f

Delete python.py

Browse files

Files changed (1) hide show

python.py +0 -93

python.py DELETED Viewed

@@ -1,93 +0,0 @@
-# -*- coding: utf-8 -*-
-"""PYTHON.ipynb
-Automatically generated by Colaboratory.
-Original file is located at
-    https://colab.research.google.com/drive/1NcMpq7CN_j3cSirY034F_NcEvpzYgnZ4
-"""
-import os
-os.system('pip install nltk')
-os.system('pip install sklearn')
-import nltk
-nltk.download('punkt')
-nltk.download('stopwords')
-nltk.download('wordnet')
-nltk.download('omw-1.4')
-# importing relevant python packages
-import streamlit as st
-import pandas as pd
-import numpy as np
-import pickle
-from PIL import Image
-# preprocessing
-import re
-import string
-import nltk
-from nltk.corpus import stopwords
-from nltk.stem import WordNetLemmatizer
-from sklearn.feature_extraction.text import TfidfVectorizer
-# modeling
-from sklearn import svm
-# sentiment analysis
-# creating page sections
-site_header = st.container()
-business_context = st.container()
-data_desc = st.container()
-performance = st.container()
-tweet_input = st.container()
-model_results = st.container()
-sentiment_analysis = st.container()
-contact = st.container()
-with site_header:
-    st.title('Toxic Comment Detection')
-with tweet_input:
-    st.header('Is Your Tweet Considered Hate Speech?')
-    st.write("""*Please note that this prediction is based on how the model was trained, so it may not be an accurate representation.*""")
-    # user input here
-    user_text = st.text_input('Enter Tweet', max_chars=280) # setting input as user_text
-with model_results:
-    st.subheader('Prediction:')
-    if user_text:
-    # processing user_text
-        # removing punctuation
-        user_text = re.sub('[%s]' % re.escape(string.punctuation), '', user_text)
-        # tokenizing
-        stop_words = set(stopwords.words('english'))
-        tokens = nltk.word_tokenize(user_text)
-        # removing stop words
-        stopwords_removed = [token.lower() for token in tokens if token.lower() not in stop_words]
-        # taking root word
-        lemmatizer = WordNetLemmatizer()
-        lemmatized_output = []
-        for word in stopwords_removed:
-            lemmatized_output.append(lemmatizer.lemmatize(word))
-        # instantiating count vectorizor
-        tfidf = TfidfVectorizer(stop_words=stop_words)
-        X_train = pickle.load(open('X_train.pickle', 'rb'))
-        X_test = lemmatized_output
-        X_train_count = tfidf.fit_transform(X_train)
-        X_test_count = tfidf.transform(X_test)
-        # loading in model
-        final_model = pickle.load(open('final_bayes.pickle', 'rb'))
-        # apply model to make predictions
-        prediction = final_model.predict(X_test_count[0])
-        if prediction == 0:
-            st.subheader('**Not Hate Speech**')
-        else:
-            st.subheader('**Hate Speech**')
-        st.text('')