Spaces:

spark-nlp
/

detect-cyberbullying-in-tweets

Sleeping

App Files Files Community

abdullahmubeen10 commited on Jul 15, 2024

Commit

f475ccd

verified ·

1 Parent(s): 9c65f75

Upload 9 files

Browse files

Files changed (9) hide show

.streamlit/config.toml +3 -0
Demo.py +120 -0
Dockerfile +70 -0
images/Cyberbullying.jpeg +0 -0
images/Sentiment-Analysis.jpg +0 -0
images/dataset.png +0 -0
images/johnsnowlabs-sentiment-output.png +0 -0
pages/Workflow & Model Overview.py +176 -0
requirements.txt +5 -0

.streamlit/config.toml ADDED Viewed

	@@ -0,0 +1,3 @@

+[theme]
+base="light"
+primaryColor="#29B4E8"

Demo.py ADDED Viewed

	@@ -0,0 +1,120 @@

+import streamlit as st
+import sparknlp
+import os
+import pandas as pd
+from sparknlp.base import *
+from sparknlp.annotator import *
+from pyspark.ml import Pipeline
+from sparknlp.pretrained import PretrainedPipeline
+# Page configuration
+st.set_page_config(
+    layout="wide",
+    page_title="Spark NLP Demos App",
+    initial_sidebar_state="auto"
+)
+# CSS for styling
+st.markdown("""
+    <style>
+        .main-title {
+            font-size: 36px;
+            color: #4A90E2;
+            font-weight: bold;
+            text-align: center;
+        }
+        .section p, .section ul {
+            color: #666666;
+        }
+    </style>
+""", unsafe_allow_html=True)
+@st.cache_resource
+def init_spark():
+    return sparknlp.start()
+@st.cache_resource
+def create_pipeline(model):
+  documentAssembler = DocumentAssembler()\
+    .setInputCol("text")\
+    .setOutputCol("document")
+  use = UniversalSentenceEncoder.pretrained()\
+    .setInputCols(["document"])\
+    .setOutputCol("sentence_embeddings")
+  sentimentdl = ClassifierDLModel.pretrained(model)\
+    .setInputCols(["sentence_embeddings"])\
+    .setOutputCol("sentiment")
+  nlpPipeline = Pipeline(stages = [documentAssembler, use, sentimentdl])
+  return nlpPipeline
+def fit_data(pipeline, data):
+    empty_df = spark.createDataFrame([['']]).toDF('text')
+    pipeline_model = pipeline.fit(empty_df)
+    model = LightPipeline(pipeline_model)
+    results = model.fullAnnotate(data)[0]
+    return results['sentiment'][0].result
+# Set up the page layout
+st.markdown('<div class="main-title">Detect Cyberbullying in Tweets with Spark NLP</div>', unsafe_allow_html=True)
+# Sidebar content
+model = st.sidebar.selectbox(
+    "Choose the pretrained model",
+    ["classifierdl_use_cyberbullying"],
+    help="For more info about the models visit: https://sparknlp.org/models"
+)
+# Reference notebook link in sidebar
+link = """
+<a href="https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/streamlit_notebooks/SENTIMENT_EN_CYBERBULLYING.ipynb">
+    <img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/>
+</a>
+"""
+st.sidebar.markdown('Reference notebook:')
+st.sidebar.markdown(link, unsafe_allow_html=True)
+# Load examples
+examples = [
+  "@CALMicC he kept me informed on stuff id missed and seemed ok. I liked him.",
+  "@AMohedin Okay, we have women being physically inferior and the either emotionally or mentally inferior in some way.",
+  "@LynnMagic people think that implying association via follow is a bad thing. but it's shockingly accurate.",
+  "@Rayandawlah_ @_Jihad10 These days might and honor come from science, technology, humanitarianism. Which is why Muslims won't get any.",
+  "Stay outve Congress and we have a deal. @jacobkramer17 Call me sexist bt the super bowl should b guys only no women are allowed n th stadium",
+  "I'm looking for a few people to help with @ggautoblocker's twitter. Log &amp; categorize mentions as support requests/abusive/positive tweets.",
+  "@geeky_zekey Thanks for showing again that blacks are the biggest racists. Blocked",
+  """@ListenToRaisin No question. Feminists have the media. Did you see any mention of Clem Fords OPEN bigotry, etc?  Nope. "Narrative" is all.""",
+  "RT @EBeisner @ahall012 I agree with you!! I would rather brush my teeth with sandpaper then watch football with a girl!!",
+  "@hibach8 But it is a lie.  The religion is a disgusting, terrorist, hate mongering piece of filth.  That has nothing to do with individuals."
+]
+st.subheader("Identify Racism, Sexism or Neutral tweets using our pretrained emotions detector.")
+selected_text = st.selectbox("Select a sample", examples)
+custom_input = st.text_input("Try it for yourself!")
+if custom_input:
+    selected_text = custom_input
+elif selected_text:
+    selected_text = selected_text
+st.subheader('Selected Text')
+st.write(selected_text)
+# Initialize Spark and create pipeline
+spark = init_spark()
+pipeline = create_pipeline(model)
+output = fit_data(pipeline, selected_text)
+# Display output sentence
+if output.lower() in ['neutral', 'normal']:
+  st.markdown("""<h3>This seems like a <span style="color: green">{}</span> tweet. <span style="font-size:35px;">&#128515;</span></h3>""".format(output), unsafe_allow_html=True)
+elif output.lower() in ['racism', 'sexism']:
+  st.markdown("""<h3>This seems like a <span style="color: #B64434">{}</span> tweet. <span style="font-size:35px;">&#129324;</span></h3>""".format(output), unsafe_allow_html=True)

Dockerfile ADDED Viewed

	@@ -0,0 +1,70 @@

+# Download base image ubuntu 18.04
+FROM ubuntu:18.04
+# Set environment variables
+ENV NB_USER jovyan
+ENV NB_UID 1000
+ENV HOME /home/${NB_USER}
+# Install required packages
+RUN apt-get update && apt-get install -y \
+    tar \
+    wget \
+    bash \
+    rsync \
+    gcc \
+    libfreetype6-dev \
+    libhdf5-serial-dev \
+    libpng-dev \
+    libzmq3-dev \
+    python3 \
+    python3-dev \
+    python3-pip \
+    unzip \
+    pkg-config \
+    software-properties-common \
+    graphviz \
+    openjdk-8-jdk \
+    ant \
+    ca-certificates-java \
+    && apt-get clean \
+    && update-ca-certificates -f;
+# Install Python 3.8 and pip
+RUN add-apt-repository ppa:deadsnakes/ppa \
+    && apt-get update \
+    && apt-get install -y python3.8 python3-pip \
+    && apt-get clean;
+# Set up JAVA_HOME
+ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/
+RUN mkdir -p ${HOME} \
+    && echo "export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/" >> ${HOME}/.bashrc \
+    && chown -R ${NB_UID}:${NB_UID} ${HOME}
+# Create a new user named "jovyan" with user ID 1000
+RUN useradd -m -u ${NB_UID} ${NB_USER}
+# Switch to the "jovyan" user
+USER ${NB_USER}
+# Set home and path variables for the user
+ENV HOME=/home/${NB_USER} \
+    PATH=/home/${NB_USER}/.local/bin:$PATH
+# Set the working directory to the user's home directory
+WORKDIR ${HOME}
+# Upgrade pip and install Python dependencies
+RUN python3.8 -m pip install --upgrade pip
+COPY requirements.txt /tmp/requirements.txt
+RUN python3.8 -m pip install -r /tmp/requirements.txt
+# Copy the application code into the container at /home/jovyan
+COPY --chown=${NB_USER}:${NB_USER} . ${HOME}
+# Expose port for Streamlit
+EXPOSE 7860
+# Define the entry point for the container
+ENTRYPOINT ["streamlit", "run", "Demo.py", "--server.port=7860", "--server.address=0.0.0.0"]

images/Cyberbullying.jpeg ADDED Viewed

images/Sentiment-Analysis.jpg ADDED Viewed

images/dataset.png ADDED Viewed

images/johnsnowlabs-sentiment-output.png ADDED Viewed

pages/Workflow & Model Overview.py ADDED Viewed

	@@ -0,0 +1,176 @@

+import streamlit as st
+# Custom CSS for better styling
+st.markdown("""
+    <style>
+        .main-title {
+            font-size: 36px;
+            color: #4A90E2;
+            font-weight: bold;
+            text-align: center;
+        }
+        .sub-title {
+            font-size: 24px;
+            color: #4A90E2;
+            margin-top: 20px;
+        }
+        .section {
+            background-color: #f9f9f9;
+            padding: 15px;
+            border-radius: 10px;
+            margin-top: 20px;
+        }
+        .section h2 {
+            font-size: 22px;
+            color: #4A90E2;
+        }
+        .section p, .section ul {
+            color: #666666;
+        }
+        .link {
+            color: #4A90E2;
+            text-decoration: none;
+        }
+    </style>
+""", unsafe_allow_html=True)
+# Introduction
+st.markdown('<div class="main-title">Cyberbullying Detection in Tweets with Spark NLP</div>', unsafe_allow_html=True)
+st.markdown("""
+<div class="section">
+    <p>Welcome to the Spark NLP Cyberbullying Detection Demo App! Detecting cyberbullying in social media posts is crucial to creating a safer online environment. This app demonstrates how to use Spark NLP's powerful tools to identify and classify cyberbullying in tweets.</p>
+</div>
+""", unsafe_allow_html=True)
+st.write("")
+st.image('images/Cyberbullying.jpeg', use_column_width='auto')
+# About Cyberbullying Detection
+st.markdown('<div class="sub-title">About Cyberbullying Detection</div>', unsafe_allow_html=True)
+st.markdown("""
+<div class="section">
+    <p>Cyberbullying detection involves analyzing text to identify instances of harmful, threatening, or abusive language. Cyberbullying can have severe psychological effects on victims, making it essential to identify and address it promptly. Using Spark NLP, we can build a model to detect and classify cyberbullying in social media posts, helping to mitigate the negative impacts of online harassment.</p>
+</div>
+""", unsafe_allow_html=True)
+# Using Cyberbullying Detection Model in Spark NLP
+st.markdown('<div class="sub-title">Using Cyberbullying Detection Model in Spark NLP</div>', unsafe_allow_html=True)
+st.markdown("""
+<div class="section">
+    <p>The following pipeline uses the Universal Sentence Encoder and a pre-trained ClassifierDL model to detect cyberbullying in tweets. This model can identify various forms of cyberbullying such as racism and sexism.</p>
+</div>
+""", unsafe_allow_html=True)
+st.markdown('<h2 class="sub-title">Example Usage in Python</h2>', unsafe_allow_html=True)
+# Setup Instructions
+st.markdown('<div class="sub-title">Setup</div>', unsafe_allow_html=True)
+st.markdown('<p>To install Spark NLP in Python, use your favorite package manager (conda, pip, etc.). For example:</p>', unsafe_allow_html=True)
+st.code("""
+pip install spark-nlp
+pip install pyspark
+""", language="bash")
+st.markdown("<p>Then, import Spark NLP and start a Spark session:</p>", unsafe_allow_html=True)
+st.code("""
+import sparknlp
+# Start Spark Session
+spark = sparknlp.start()
+""", language='python')
+# Cyberbullying Detection Example
+st.markdown('<div class="sub-title">Example Usage: Cyberbullying Detection with Spark NLP</div>', unsafe_allow_html=True)
+st.code('''
+from sparknlp.base import DocumentAssembler, LightPipeline
+from sparknlp.annotator import UniversalSentenceEncoder, ClassifierDLModel
+from pyspark.ml import Pipeline
+# Step 1: Transforms raw texts to document annotation
+document_assembler = DocumentAssembler()\\
+    .setInputCol("text")\\
+    .setOutputCol("document")
+# Step 2: Universal Sentence Encoder
+use = UniversalSentenceEncoder.pretrained('tfhub_use', lang="en") \\
+    .setInputCols(["document"])\\
+    .setOutputCol("sentence_embeddings")
+# Step 3: ClassifierDLModel for Cyberbullying Detection
+document_classifier = ClassifierDLModel.pretrained('classifierdl_use_cyberbullying', 'en') \\
+    .setInputCols(["sentence_embeddings"]) \\
+    .setOutputCol("class")
+# Define the pipeline
+nlp_pipeline = Pipeline(stages=[document_assembler, use, document_classifier])
+# Create a light pipeline for prediction
+light_pipeline = LightPipeline(nlp_pipeline.fit(spark.createDataFrame([['']]).toDF("text")))
+# Predict cyberbullying in a tweet
+annotations = light_pipeline.fullAnnotate('@geeky_zekey Thanks for showing again that blacks are the biggest racists. Blocked')
+print(annotations[0]['class'][0])
+''', language='python')
+st.text("""
+Output:
+Annotation(category, 0, 81, racism, {'sentence': '0', 'sexism': '2.4904006E-7', 'neutral': '9.4820876E-5', 'racism': '0.9999049'}, [])
+""")
+st.markdown("""
+<p>The annotation classifies the text as "racism" with a probability score of 0.9999049, indicating very high confidence, while also providing low probability scores for "sexism" and "neutral."</p>
+""", unsafe_allow_html=True)
+# Benchmarking Section
+st.markdown('<div class="sub-title">Benchmarking</div>', unsafe_allow_html=True)
+st.markdown("""
+<div class="section">
+    <p>The following table summarizes the performance of the Cyberbullying Detection model in terms of precision, recall, and f1-score:</p>
+    <pre>
+    precision    recall  f1-score   support
+    neutral      0.72      0.76      0.74       700
+    racism       0.89      0.94      0.92       773
+    sexism       0.82      0.71      0.76       622
+    accuracy                           0.81      2095
+    macro avg       0.81      0.80     0.80      2095
+    weighted avg    0.81      0.81     0.81      2095
+    </pre>
+</div>
+""", unsafe_allow_html=True)
+# Conclusion
+st.markdown("""
+<div class="section">
+    <h2>Conclusion</h2>
+    <p>In this app, we demonstrated how to use Spark NLP's ClassifierDL model to perform cyberbullying detection on tweet data. These powerful tools enable users to efficiently process large datasets and identify harmful content, providing deeper insights for various applications. By integrating these annotators into your NLP pipelines, you can enhance text understanding, information extraction, and online safety measures.</p>
+</div>
+""", unsafe_allow_html=True)
+# References and Additional Information
+st.markdown('<div class="sub-title">For additional information, please check the following references.</div>', unsafe_allow_html=True)
+st.markdown("""
+<div class="section">
+    <ul>
+        <li>Documentation : <a href="https://nlp.johnsnowlabs.com/docs/en/transformers#classifierdl" target="_blank" rel="noopener">ClassifierDLModel</a></li>
+        <li>Python Docs : <a href="https://nlp.johnsnowlabs.com/api/python/reference/autosummary/sparknlp/annotator/classifierdl/index.html#sparknlp.annotator.classifierdl.ClassifierDLModel" target="_blank" rel="noopener">ClassifierDLModel</a></li>
+        <li>Model Used : <a href="https://sparknlp.org/2021/01/09/classifierdl_use_cyberbullying_en.html" target="_blank" rel="noopener">classifierdl_use_cyberbullying</a></li>
+    </ul>
+</div>
+""", unsafe_allow_html=True)
+st.markdown('<div class="sub-title">Community & Support</div>', unsafe_allow_html=True)
+st.markdown("""
+<div class="section">
+    <ul>
+        <li><a class="link" href="https://sparknlp.org/" target="_blank">Official Website</a>: Documentation and examples</li>
+        <li><a class="link" href="https://join.slack.com/t/spark-nlp/shared_invite/zt-198dipu77-L3UWNe_AJ8xqDk0ivmih5Q" target="_blank">Slack</a>: Live discussion with the community and team</li>
+        <li><a class="link" href="https://github.com/JohnSnowLabs/spark-nlp" target="_blank">GitHub</a>: Bug reports, feature requests, and contributions</li>
+        <li><a class="link" href="https://medium.com/spark-nlp" target="_blank">Medium</a>: Spark NLP articles</li>
+        <li><a class="link" href="https://www.youtube.com/channel/UCmFOjlpYEhxf_wJUDuz6xxQ/videos" target="_blank">YouTube</a>: Video tutorials</li>
+    </ul>
+</div>
+""", unsafe_allow_html=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+streamlit
+pandas
+numpy
+spark-nlp
+pyspark