Spaces:

spark-nlp
/

detect-sarcasm

Sleeping

App Files Files Community

abdullahmubeen10 commited on Jul 15, 2024

Commit

a3eaa4a

verified ·

1 Parent(s): f8354fd

Upload 6 files

Browse files

Files changed (6) hide show

.streamlit/config.toml +3 -0
Demo.py +119 -0
Dockerfile +70 -0
images/sarcasm.jpg +0 -0
pages/Workflow & Model Overview.py +179 -0
requirements.txt +5 -0

.streamlit/config.toml ADDED Viewed

	@@ -0,0 +1,3 @@

+[theme]
+base="light"
+primaryColor="#29B4E8"

Demo.py ADDED Viewed

	@@ -0,0 +1,119 @@

+import streamlit as st
+import sparknlp
+import os
+import pandas as pd
+from sparknlp.base import *
+from sparknlp.annotator import *
+from pyspark.ml import Pipeline
+from sparknlp.pretrained import PretrainedPipeline
+# Page configuration
+st.set_page_config(
+    layout="wide",
+    page_title="Spark NLP Demos App",
+    initial_sidebar_state="auto"
+)
+# CSS for styling
+st.markdown("""
+    <style>
+        .main-title {
+            font-size: 36px;
+            color: #4A90E2;
+            font-weight: bold;
+            text-align: center;
+        }
+        .section p, .section ul {
+            color: #666666;
+        }
+    </style>
+""", unsafe_allow_html=True)
+@st.cache_resource
+def init_spark():
+    return sparknlp.start()
+@st.cache_resource
+def create_pipeline(model):
+  documentAssembler = DocumentAssembler()\
+  .setInputCol("text")\
+  .setOutputCol("document")
+  use = UniversalSentenceEncoder.pretrained()\
+  .setInputCols(["document"])\
+  .setOutputCol("sentence_embeddings")
+  sentimentdl = ClassifierDLModel.pretrained(model)\
+    .setInputCols(["sentence_embeddings"])\
+    .setOutputCol("sentiment")
+  nlpPipeline = Pipeline(stages = [documentAssembler, use, sentimentdl])
+  return nlpPipeline
+def fit_data(pipeline, data):
+    empty_df = spark.createDataFrame([['']]).toDF('text')
+    pipeline_model = pipeline.fit(empty_df)
+    model = LightPipeline(pipeline_model)
+    results = model.fullAnnotate(data)[0]
+    return results['sentiment'][0].result
+# Set up the page layout
+st.markdown('<div class="main-title">Detect Sarcastic Tweets with Spark NLP</div>', unsafe_allow_html=True)
+# Sidebar content
+model = st.sidebar.selectbox(
+    "Choose the pretrained model",
+    ["classifierdl_use_sarcasm"],
+    help="For more info about the models visit: https://sparknlp.org/models"
+)
+# Reference notebook link in sidebar
+link = """
+<a href="https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/streamlit_notebooks/SENTIMENT_EN_SARCASM.ipynb">
+    <img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/>
+</a>
+"""
+st.sidebar.markdown('Reference notebook:')
+st.sidebar.markdown(link, unsafe_allow_html=True)
+# Load examples
+examples = [
+  "Love getting home from work knowing that in less than 8hours you're getting up to go back there again.",
+  "Oh my gosh! Can you imagine @JessieJ playing piano on her tour while singing a song. I would die and go to heaven. #sheisanangel",
+  "Dear Teva, thank you for waking me up every few hours by howling. Your just trying to be mother natures alarm clock.",
+  "The United States is a signatory to this international convention",
+  "If I could put into words how much I love waking up at am on Tuesdays I would",
+  "@pdomo Don't forget that Nick Foles is also the new Tom Brady. What a preseason! #toomanystudQBs #thankgodwedonthavetebow",
+  "I cant even describe how excited I am to go cook noodles for hours",
+  "@Will_Piper should move back up fella. I'm already here... On my own... Having loads of fun",
+  "Tweeting at work... Having sooooo much fun and honestly not bored at all #countdowntillfinish",
+  "I can do what I want to. I play by my own rules"
+]
+selected_text = st.selectbox("Select a sample", examples)
+custom_input = st.text_input("Try it for yourself!")
+if custom_input:
+    selected_text = custom_input
+elif selected_text:
+    selected_text = selected_text
+st.subheader('Selected Text')
+st.write(selected_text)
+st.subheader('Selected Text')
+st.write(selected_text)
+# Initialize Spark and create pipeline
+spark = init_spark()
+pipeline = create_pipeline(model)
+output = fit_data(pipeline, selected_text)
+# Display output sentence
+if output in ['neutral', 'normal']:
+  st.markdown("""<h3>This seems like <span style="color: #209DDC">{}</span> news. <span style="font-size:35px;">&#128578;</span></h3>""".format(output), unsafe_allow_html=True)
+elif output == 'sarcasm':
+  st.markdown("""<h3>This seems like a <span style="color: #B64434">{}</span> tweet. <span style="font-size:35px;">&#128579;</span></h3>""".format('sarcastic'), unsafe_allow_html=True)

Dockerfile ADDED Viewed

	@@ -0,0 +1,70 @@

+# Download base image ubuntu 18.04
+FROM ubuntu:18.04
+# Set environment variables
+ENV NB_USER jovyan
+ENV NB_UID 1000
+ENV HOME /home/${NB_USER}
+# Install required packages
+RUN apt-get update && apt-get install -y \
+    tar \
+    wget \
+    bash \
+    rsync \
+    gcc \
+    libfreetype6-dev \
+    libhdf5-serial-dev \
+    libpng-dev \
+    libzmq3-dev \
+    python3 \
+    python3-dev \
+    python3-pip \
+    unzip \
+    pkg-config \
+    software-properties-common \
+    graphviz \
+    openjdk-8-jdk \
+    ant \
+    ca-certificates-java \
+    && apt-get clean \
+    && update-ca-certificates -f;
+# Install Python 3.8 and pip
+RUN add-apt-repository ppa:deadsnakes/ppa \
+    && apt-get update \
+    && apt-get install -y python3.8 python3-pip \
+    && apt-get clean;
+# Set up JAVA_HOME
+ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/
+RUN mkdir -p ${HOME} \
+    && echo "export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/" >> ${HOME}/.bashrc \
+    && chown -R ${NB_UID}:${NB_UID} ${HOME}
+# Create a new user named "jovyan" with user ID 1000
+RUN useradd -m -u ${NB_UID} ${NB_USER}
+# Switch to the "jovyan" user
+USER ${NB_USER}
+# Set home and path variables for the user
+ENV HOME=/home/${NB_USER} \
+    PATH=/home/${NB_USER}/.local/bin:$PATH
+# Set the working directory to the user's home directory
+WORKDIR ${HOME}
+# Upgrade pip and install Python dependencies
+RUN python3.8 -m pip install --upgrade pip
+COPY requirements.txt /tmp/requirements.txt
+RUN python3.8 -m pip install -r /tmp/requirements.txt
+# Copy the application code into the container at /home/jovyan
+COPY --chown=${NB_USER}:${NB_USER} . ${HOME}
+# Expose port for Streamlit
+EXPOSE 7860
+# Define the entry point for the container
+ENTRYPOINT ["streamlit", "run", "Demo.py", "--server.port=7860", "--server.address=0.0.0.0"]

images/sarcasm.jpg ADDED Viewed

pages/Workflow & Model Overview.py ADDED Viewed

	@@ -0,0 +1,179 @@

+import streamlit as st
+# Custom CSS for better styling
+st.markdown("""
+    <style>
+        .main-title {
+            font-size: 36px;
+            color: #4A90E2;
+            font-weight: bold;
+            text-align: center;
+        }
+        .sub-title {
+            font-size: 24px;
+            color: #4A90E2;
+            margin-top: 20px;
+        }
+        .section {
+            background-color: #f9f9f9;
+            padding: 15px;
+            border-radius: 10px;
+            margin-top: 20px;
+        }
+        .section h2 {
+            font-size: 22px;
+            color: #4A90E2;
+        }
+        .section p, .section ul {
+            color: #666666;
+        }
+        .link {
+            color: #4A90E2;
+            text-decoration: none;
+        }
+    </style>
+""", unsafe_allow_html=True)
+# Introduction
+st.markdown('<div class="main-title">Detecting Sarcasm with Spark NLP</div>', unsafe_allow_html=True)
+st.markdown("""
+<div class="section">
+    <p>Welcome to the Spark NLP Sarcasm Detection Demo App! Detecting sarcasm in text is crucial for understanding sentiment and context. This app utilizes advanced natural language processing techniques to identify instances of sarcasm with high accuracy.</p>
+    <p>This demo showcases the use of Spark NLP's ClassifierDLModel pretrained on Universal Sentence Encoder embeddings to classify text as sarcastic or normal.</p>
+</div>
+""", unsafe_allow_html=True)
+st.image('images/sarcasm.jpg', use_column_width='auto')
+# About Sarcasm Detection
+st.markdown('<div class="sub-title">About Sarcasm Detection</div>', unsafe_allow_html=True)
+st.markdown("""
+<div class="section">
+    <p>Sarcasm detection involves identifying language that is contrary to the literal meaning, often used to convey humor or irony. It plays a crucial role in sentiment analysis and understanding textual context.</p>
+    <p>Effective sarcasm detection models improve the accuracy of sentiment analysis and help in better understanding user intent.</p>
+</div>
+""", unsafe_allow_html=True)
+# Using ClassifierDLModel in Spark NLP
+st.markdown('<div class="sub-title">Using ClassifierDLModel in Spark NLP</div>', unsafe_allow_html=True)
+st.markdown("""
+<div class="section">
+    <p>The ClassifierDLModel in Spark NLP utilizes deep learning techniques to classify text into predefined categories, in this case, detecting sarcasm or normal text. It is trained on Universal Sentence Encoder embeddings for robust performance.</p>
+    <p>For more details, refer to the <a class="link" href="https://sparknlp.org/docs/en/annotators#classifierdl" target="_blank" rel="noopener">ClassifierDLModel documentation</a> on Spark NLP's official website.</p>
+</div>
+""", unsafe_allow_html=True)
+st.markdown('<h2 class="sub-title">Example Usage in Python</h2>', unsafe_allow_html=True)
+st.markdown('<p>Here’s how you can implement sarcasm detection using the ClassifierDLModel in Spark NLP:</p>', unsafe_allow_html=True)
+# Setup Instructions
+st.markdown('<div class="sub-title">Setup</div>', unsafe_allow_html=True)
+st.markdown('<p>To use Spark NLP for sarcasm detection, follow these setup instructions:</p>', unsafe_allow_html=True)
+st.code("""
+pip install spark-nlp
+pip install pyspark
+""", language="bash")
+st.markdown("<p>Then, import Spark NLP and start a Spark session:</p>", unsafe_allow_html=True)
+st.code("""
+import sparknlp
+# Start Spark Session
+spark = sparknlp.start()
+""", language='python')
+# Sentiment Analysis Example
+st.markdown('<div class="sub-title">Example Usage: Sarcasm Detection with ClassifierDLModel</div>', unsafe_allow_html=True)
+st.code('''
+from sparknlp.base import DocumentAssembler
+from sparknlp.annotator import UniversalSentenceEncoder, ClassifierDLModel
+from pyspark.ml import Pipeline
+# Step 1: DocumentAssembler
+document_assembler = DocumentAssembler() \\
+    .setInputCol("text") \\
+    .setOutputCol("document")
+# Step 2: UniversalSentenceEncoder
+use = UniversalSentenceEncoder.pretrained() \\
+    .setInputCols(["document"]) \\
+    .setOutputCol("sentence_embeddings")
+# Step 3: ClassifierDLModel for Sarcasm Detection
+sentimentdl = ClassifierDLModel.pretrained('classifierdl_use_sarcasm') \\
+    .setInputCols(["sentence_embeddings"]) \\
+    .setOutputCol("sentiment")
+# Define the NLP Pipeline
+nlpPipeline = Pipeline(stages=[document_assembler, use, sentimentdl])
+# Example Text
+text = "Oh, great! Another meeting scheduled for Friday afternoon. That's just what I needed."
+# Process the text through the pipeline
+result = nlpPipeline.fit(spark.createDataFrame([[text]]).toDF("text")).transform(spark.createDataFrame([[text]]).toDF("text")).select('text', 'sentiment.result').show(truncate=False)
+''', language='python')
+st.text("""
++-------------------------------------------------------------------------------------+---------+
+|text                                                                                 |result   |
++-------------------------------------------------------------------------------------+---------+
+|Oh, great! Another meeting scheduled for Friday afternoon. That's just what I needed.|[sarcasm]|
++-------------------------------------------------------------------------------------+---------+
+""")
+st.markdown("""
+<p>The above example demonstrates how to use Spark NLP's ClassifierDLModel to detect sarcasm in text using Universal Sentence Encoder embeddings.</p>
+""", unsafe_allow_html=True)
+# Benchmarking
+st.markdown('<div class="sub-title">Benchmarking</div>', unsafe_allow_html=True)
+st.markdown("""
+<div class="section">
+    <p>Performance metrics of the sarcasm detection model:</p>
+    <pre>
+    precision    recall  f1-score   support
+    normal       0.98      0.89      0.93       495
+    sarcasm      0.60      0.91      0.73        93
+    accuracy                           0.89       588
+    macro avg      0.79      0.90      0.83       588
+    weighted avg   0.92      0.89      0.90       588
+</div>
+""", unsafe_allow_html=True)
+# Conclusion
+st.markdown("""
+<div class="section">
+    <h2>Conclusion</h2>
+    <p>In this app, we explored how Spark NLP's ClassifierDLModel can be used to detect sarcasm in text. This capability enhances sentiment analysis and contextual understanding in various applications, improving the accuracy of natural language processing tasks.</p>
+</div>
+""", unsafe_allow_html=True)
+# References and Additional Information
+st.markdown('<div class="sub-title">For additional information, please check the following references.</div>', unsafe_allow_html=True)
+st.markdown("""
+<div class="section">
+        <ul>
+            <li>Documentation :&nbsp;<a class="link" href="https://nlp.johnsnowlabs.com/docs/en/annotators#classifierdlmodel" target="_blank" rel="noopener">ClassifierDLModel</a></li>
+            <li>Python Docs :&nbsp;<a class="link" href="https://nlp.johnsnowlabs.com/api/python/reference/autosummary/sparknlp.annotator.ClassifierDLModel.html" target="_blank" rel="noopener">ClassifierDLModel</a></li>
+            <li>Model Used :&nbsp;<a class="link" href="https://sparknlp.org/2021/01/09/classifierdl_use_sarcasm_en.html" target="_blank" rel="noopener">classifierdl_use_sarcasm</a></li>
+        </ul>
+    </div>
+""", unsafe_allow_html=True)
+st.markdown('<div class="sub-title">Community & Support</div>', unsafe_allow_html=True)
+st.markdown("""
+<div class="section">
+    <ul>
+        <li><a class="link" href="https://sparknlp.org/" target="_blank">Official Website</a>: Documentation and examples</li>
+        <li><a class="link" href="https://join.slack.com/t/spark-nlp/shared_invite/zt-198dipu77-L3UWNe_AJ8xqDk0ivmih5Q" target="_blank">Slack</a>: Live discussion with the community and team</li>
+        <li><a class="link" href="https://github.com/JohnSnowLabs/spark-nlp" target="_blank">GitHub</a>: Bug reports, feature requests, and contributions</li>
+        <li><a class="link" href="https://medium.com/spark-nlp" target="_blank">Medium</a>: Spark NLP articles</li>
+        <li><a class="link" href="https://www.youtube.com/channel/UCmFOjlpYEhxf_wJUDuz6xxQ/videos" target="_blank">YouTube</a>: Video tutorials</li>
+    </ul>
+</div>
+""", unsafe_allow_html=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+streamlit
+pandas
+numpy
+spark-nlp
+pyspark