Spaces:

Vrooh933
/

CyberBullying_SentimentAnalysis

Runtime error

App Files Files Community

Vrooh933 commited on Dec 2, 2022

Commit

4aaa7da

1 Parent(s): 4c38a42

Create app.py

Browse files

Files changed (1) hide show

app.py +82 -0

app.py ADDED Viewed

	@@ -0,0 +1,82 @@

+import gradio as gr
+import joblib
+import re
+import string
+from nltk.corpus import stopwords
+from nltk.tokenize import word_tokenize
+from nltk.stem import PorterStemmer, WordNetLemmatizer
+import nltk
+from keras.models import load_model
+nltk.download('stopwords')
+stop_words = set(stopwords.words('english'))
+# downloading the additional resources required by nltk
+nltk.download('punkt')
+nltk.download('wordnet')
+nltk.download('omw-1.4')
+# model initiation
+import xgboost
+cv = joblib.load('finalized_model.sav')
+model = joblib.load('BestModels/best_rf.sav')
+def preprocess_text(text):
+    """
+    Runs a set of transformational steps to
+    preprocess the text of the tweet.
+    """
+    # convert all text to lower case
+    text = text.lower()
+    # remove any urls
+    text = re.sub(r'http\S+|www\S+|https\S+', "", text, flags=re.MULTILINE)
+    # replace '****' with 'curse'
+    text = re.sub(r'\*\*\*\*', "gaali", text)
+    # remove punctuations
+    text = text.translate(str.maketrans("", "", string.punctuation))
+    # remove user @ references and hashtags
+    text = re.sub(r'\@\w+|\#', "", text)
+    # remove useless characters
+    text = re.sub(r'[^ -~]', '', text)
+    # remove stopwords
+    tweet_tokens = word_tokenize(text)
+    filtered_words = [word for word in tweet_tokens if word not in stop_words]
+    # stemming
+    ps = PorterStemmer()
+    stemmed_words = [ps.stem(w) for w in filtered_words]
+    # lemmatizing
+    lemmatizer = WordNetLemmatizer()
+    lemma_words = [lemmatizer.lemmatize(w, pos='a') for w in stemmed_words]
+    return ' '.join(lemma_words)
+def sentiment_analysis(text):
+    # print(text)
+    text = cv.transform([preprocess_text(text)])
+    pred_prob = model.predict_proba(text)[0]
+    output = {"not_cyberbullying": float(pred_prob[0]),
+              "gender": float(pred_prob[1]),
+              "religion": float(pred_prob[2]),
+              "age": float(pred_prob[3]),
+              "ethnicity": float(pred_prob[4]),
+              "other_cyberbullying": float(pred_prob[5])}
+    # print(output)
+    return output
+intfc = gr.Interface(
+    fn=sentiment_analysis,
+    inputs=gr.Textbox(label="Input here", lines=2, placeholder="Input your text"),
+    outputs=gr.Label(label="Sentiment Analysis"),
+)
+intfc.launch(share=True)