import streamlit as st import sparknlp import os import pandas as pd from sparknlp.base import * from sparknlp.annotator import * from pyspark.ml import Pipeline from sparknlp.pretrained import PretrainedPipeline # Page configuration st.set_page_config( layout="wide", page_title="Spark NLP Demos App", initial_sidebar_state="auto" ) # CSS for styling st.markdown(""" """, unsafe_allow_html=True) @st.cache_resource def init_spark(): return sparknlp.start() @st.cache_resource def create_pipeline(model): documentAssembler = DocumentAssembler()\ .setInputCol("text")\ .setOutputCol("document") use = UniversalSentenceEncoder.pretrained()\ .setInputCols(["document"])\ .setOutputCol("sentence_embeddings") sentimentdl = ClassifierDLModel.pretrained(model)\ .setInputCols(["sentence_embeddings"])\ .setOutputCol("sentiment") nlpPipeline = Pipeline(stages = [documentAssembler, use, sentimentdl]) return nlpPipeline def fit_data(pipeline, data): empty_df = spark.createDataFrame([['']]).toDF('text') pipeline_model = pipeline.fit(empty_df) model = LightPipeline(pipeline_model) results = model.fullAnnotate(data)[0] return results['sentiment'][0].result # Set up the page layout st.markdown('
Detect Cyberbullying in Tweets with Spark NLP
', unsafe_allow_html=True) # Sidebar content model = st.sidebar.selectbox( "Choose the pretrained model", ["classifierdl_use_cyberbullying"], help="For more info about the models visit: https://sparknlp.org/models" ) # Reference notebook link in sidebar link = """ Open In Colab """ st.sidebar.markdown('Reference notebook:') st.sidebar.markdown(link, unsafe_allow_html=True) # Load examples examples = [ "@CALMicC he kept me informed on stuff id missed and seemed ok. I liked him.", "@AMohedin Okay, we have women being physically inferior and the either emotionally or mentally inferior in some way.", "@LynnMagic people think that implying association via follow is a bad thing. but it's shockingly accurate.", "@Rayandawlah_ @_Jihad10 These days might and honor come from science, technology, humanitarianism. Which is why Muslims won't get any.", "Stay outve Congress and we have a deal. @jacobkramer17 Call me sexist bt the super bowl should b guys only no women are allowed n th stadium", "I'm looking for a few people to help with @ggautoblocker's twitter. Log & categorize mentions as support requests/abusive/positive tweets.", "@geeky_zekey Thanks for showing again that blacks are the biggest racists. Blocked", """@ListenToRaisin No question. Feminists have the media. Did you see any mention of Clem Fords OPEN bigotry, etc? Nope. "Narrative" is all.""", "RT @EBeisner @ahall012 I agree with you!! I would rather brush my teeth with sandpaper then watch football with a girl!!", "@hibach8 But it is a lie. The religion is a disgusting, terrorist, hate mongering piece of filth. That has nothing to do with individuals." ] st.subheader("Identify Racism, Sexism or Neutral tweets using our pretrained emotions detector.") selected_text = st.selectbox("Select a sample", examples) custom_input = st.text_input("Try it for yourself!") if custom_input: selected_text = custom_input elif selected_text: selected_text = selected_text st.subheader('Selected Text') st.write(selected_text) # Initialize Spark and create pipeline spark = init_spark() pipeline = create_pipeline(model) output = fit_data(pipeline, selected_text) # Display output sentence if output.lower() in ['neutral', 'normal']: st.markdown("""

This seems like a {} tweet. 😃

""".format(output), unsafe_allow_html=True) elif output.lower() in ['racism', 'sexism']: st.markdown("""

This seems like a {} tweet. 🤬

""".format(output), unsafe_allow_html=True)