File size: 4,642 Bytes
f475ccd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import streamlit as st
import sparknlp
import os
import pandas as pd

from sparknlp.base import *
from sparknlp.annotator import *
from pyspark.ml import Pipeline
from sparknlp.pretrained import PretrainedPipeline

# Page configuration
st.set_page_config(
    layout="wide", 
    page_title="Spark NLP Demos App", 
    initial_sidebar_state="auto"
)

# CSS for styling
st.markdown("""

    <style>

        .main-title {

            font-size: 36px;

            color: #4A90E2;

            font-weight: bold;

            text-align: center;

        }

        .section p, .section ul {

            color: #666666;

        }

    </style>

""", unsafe_allow_html=True)

@st.cache_resource
def init_spark():
    return sparknlp.start()

@st.cache_resource
def create_pipeline(model):
  documentAssembler = DocumentAssembler()\
    .setInputCol("text")\
    .setOutputCol("document")

  use = UniversalSentenceEncoder.pretrained()\
    .setInputCols(["document"])\
    .setOutputCol("sentence_embeddings")


  sentimentdl = ClassifierDLModel.pretrained(model)\
    .setInputCols(["sentence_embeddings"])\
    .setOutputCol("sentiment")

  nlpPipeline = Pipeline(stages = [documentAssembler, use, sentimentdl])

  return nlpPipeline

def fit_data(pipeline, data):
    empty_df = spark.createDataFrame([['']]).toDF('text')
    pipeline_model = pipeline.fit(empty_df)
    model = LightPipeline(pipeline_model)
    results = model.fullAnnotate(data)[0]

    return results['sentiment'][0].result

# Set up the page layout
st.markdown('<div class="main-title">Detect Cyberbullying in Tweets with Spark NLP</div>', unsafe_allow_html=True)

# Sidebar content
model = st.sidebar.selectbox(
    "Choose the pretrained model",
    ["classifierdl_use_cyberbullying"],
    help="For more info about the models visit: https://sparknlp.org/models"
)

# Reference notebook link in sidebar
link = """

<a href="https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/streamlit_notebooks/SENTIMENT_EN_CYBERBULLYING.ipynb">

    <img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/>

</a>

"""
st.sidebar.markdown('Reference notebook:')
st.sidebar.markdown(link, unsafe_allow_html=True)

# Load examples
examples = [
  "@CALMicC he kept me informed on stuff id missed and seemed ok. I liked him.",
  "@AMohedin Okay, we have women being physically inferior and the either emotionally or mentally inferior in some way.",
  "@LynnMagic people think that implying association via follow is a bad thing. but it's shockingly accurate.",
  "@Rayandawlah_ @_Jihad10 These days might and honor come from science, technology, humanitarianism. Which is why Muslims won't get any.",
  "Stay outve Congress and we have a deal. @jacobkramer17 Call me sexist bt the super bowl should b guys only no women are allowed n th stadium",
  "I'm looking for a few people to help with @ggautoblocker's twitter. Log &amp; categorize mentions as support requests/abusive/positive tweets.",
  "@geeky_zekey Thanks for showing again that blacks are the biggest racists. Blocked",
  """@ListenToRaisin No question. Feminists have the media. Did you see any mention of Clem Fords OPEN bigotry, etc?  Nope. "Narrative" is all.""",
  "RT @EBeisner @ahall012 I agree with you!! I would rather brush my teeth with sandpaper then watch football with a girl!!",
  "@hibach8 But it is a lie.  The religion is a disgusting, terrorist, hate mongering piece of filth.  That has nothing to do with individuals."
]

st.subheader("Identify Racism, Sexism or Neutral tweets using our pretrained emotions detector.")

selected_text = st.selectbox("Select a sample", examples)
custom_input = st.text_input("Try it for yourself!")

if custom_input:
    selected_text = custom_input
elif selected_text:
    selected_text = selected_text

st.subheader('Selected Text')
st.write(selected_text)

# Initialize Spark and create pipeline
spark = init_spark()
pipeline = create_pipeline(model)
output = fit_data(pipeline, selected_text)

# Display output sentence
if output.lower() in ['neutral', 'normal']:
  st.markdown("""<h3>This seems like a <span style="color: green">{}</span> tweet. <span style="font-size:35px;">&#128515;</span></h3>""".format(output), unsafe_allow_html=True)
elif output.lower() in ['racism', 'sexism']:
  st.markdown("""<h3>This seems like a <span style="color: #B64434">{}</span> tweet. <span style="font-size:35px;">&#129324;</span></h3>""".format(output), unsafe_allow_html=True)