abdullahmubeen10 commited on
Commit
a3eaa4a
·
verified ·
1 Parent(s): f8354fd

Upload 6 files

Browse files
.streamlit/config.toml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [theme]
2
+ base="light"
3
+ primaryColor="#29B4E8"
Demo.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import sparknlp
3
+ import os
4
+ import pandas as pd
5
+
6
+ from sparknlp.base import *
7
+ from sparknlp.annotator import *
8
+ from pyspark.ml import Pipeline
9
+ from sparknlp.pretrained import PretrainedPipeline
10
+
11
+ # Page configuration
12
+ st.set_page_config(
13
+ layout="wide",
14
+ page_title="Spark NLP Demos App",
15
+ initial_sidebar_state="auto"
16
+ )
17
+
18
+ # CSS for styling
19
+ st.markdown("""
20
+ <style>
21
+ .main-title {
22
+ font-size: 36px;
23
+ color: #4A90E2;
24
+ font-weight: bold;
25
+ text-align: center;
26
+ }
27
+ .section p, .section ul {
28
+ color: #666666;
29
+ }
30
+ </style>
31
+ """, unsafe_allow_html=True)
32
+
33
+ @st.cache_resource
34
+ def init_spark():
35
+ return sparknlp.start()
36
+
37
+ @st.cache_resource
38
+ def create_pipeline(model):
39
+ documentAssembler = DocumentAssembler()\
40
+ .setInputCol("text")\
41
+ .setOutputCol("document")
42
+
43
+ use = UniversalSentenceEncoder.pretrained()\
44
+ .setInputCols(["document"])\
45
+ .setOutputCol("sentence_embeddings")
46
+
47
+ sentimentdl = ClassifierDLModel.pretrained(model)\
48
+ .setInputCols(["sentence_embeddings"])\
49
+ .setOutputCol("sentiment")
50
+
51
+ nlpPipeline = Pipeline(stages = [documentAssembler, use, sentimentdl])
52
+
53
+ return nlpPipeline
54
+
55
+ def fit_data(pipeline, data):
56
+ empty_df = spark.createDataFrame([['']]).toDF('text')
57
+ pipeline_model = pipeline.fit(empty_df)
58
+ model = LightPipeline(pipeline_model)
59
+ results = model.fullAnnotate(data)[0]
60
+
61
+ return results['sentiment'][0].result
62
+
63
+ # Set up the page layout
64
+ st.markdown('<div class="main-title">Detect Sarcastic Tweets with Spark NLP</div>', unsafe_allow_html=True)
65
+
66
+ # Sidebar content
67
+ model = st.sidebar.selectbox(
68
+ "Choose the pretrained model",
69
+ ["classifierdl_use_sarcasm"],
70
+ help="For more info about the models visit: https://sparknlp.org/models"
71
+ )
72
+
73
+ # Reference notebook link in sidebar
74
+ link = """
75
+ <a href="https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/streamlit_notebooks/SENTIMENT_EN_SARCASM.ipynb">
76
+ <img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/>
77
+ </a>
78
+ """
79
+ st.sidebar.markdown('Reference notebook:')
80
+ st.sidebar.markdown(link, unsafe_allow_html=True)
81
+
82
+ # Load examples
83
+ examples = [
84
+ "Love getting home from work knowing that in less than 8hours you're getting up to go back there again.",
85
+ "Oh my gosh! Can you imagine @JessieJ playing piano on her tour while singing a song. I would die and go to heaven. #sheisanangel",
86
+ "Dear Teva, thank you for waking me up every few hours by howling. Your just trying to be mother natures alarm clock.",
87
+ "The United States is a signatory to this international convention",
88
+ "If I could put into words how much I love waking up at am on Tuesdays I would",
89
+ "@pdomo Don't forget that Nick Foles is also the new Tom Brady. What a preseason! #toomanystudQBs #thankgodwedonthavetebow",
90
+ "I cant even describe how excited I am to go cook noodles for hours",
91
+ "@Will_Piper should move back up fella. I'm already here... On my own... Having loads of fun",
92
+ "Tweeting at work... Having sooooo much fun and honestly not bored at all #countdowntillfinish",
93
+ "I can do what I want to. I play by my own rules"
94
+ ]
95
+
96
+ selected_text = st.selectbox("Select a sample", examples)
97
+ custom_input = st.text_input("Try it for yourself!")
98
+
99
+ if custom_input:
100
+ selected_text = custom_input
101
+ elif selected_text:
102
+ selected_text = selected_text
103
+
104
+ st.subheader('Selected Text')
105
+ st.write(selected_text)
106
+
107
+ st.subheader('Selected Text')
108
+ st.write(selected_text)
109
+
110
+ # Initialize Spark and create pipeline
111
+ spark = init_spark()
112
+ pipeline = create_pipeline(model)
113
+ output = fit_data(pipeline, selected_text)
114
+
115
+ # Display output sentence
116
+ if output in ['neutral', 'normal']:
117
+ st.markdown("""<h3>This seems like <span style="color: #209DDC">{}</span> news. <span style="font-size:35px;">&#128578;</span></h3>""".format(output), unsafe_allow_html=True)
118
+ elif output == 'sarcasm':
119
+ st.markdown("""<h3>This seems like a <span style="color: #B64434">{}</span> tweet. <span style="font-size:35px;">&#128579;</span></h3>""".format('sarcastic'), unsafe_allow_html=True)
Dockerfile ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Download base image ubuntu 18.04
2
+ FROM ubuntu:18.04
3
+
4
+ # Set environment variables
5
+ ENV NB_USER jovyan
6
+ ENV NB_UID 1000
7
+ ENV HOME /home/${NB_USER}
8
+
9
+ # Install required packages
10
+ RUN apt-get update && apt-get install -y \
11
+ tar \
12
+ wget \
13
+ bash \
14
+ rsync \
15
+ gcc \
16
+ libfreetype6-dev \
17
+ libhdf5-serial-dev \
18
+ libpng-dev \
19
+ libzmq3-dev \
20
+ python3 \
21
+ python3-dev \
22
+ python3-pip \
23
+ unzip \
24
+ pkg-config \
25
+ software-properties-common \
26
+ graphviz \
27
+ openjdk-8-jdk \
28
+ ant \
29
+ ca-certificates-java \
30
+ && apt-get clean \
31
+ && update-ca-certificates -f;
32
+
33
+ # Install Python 3.8 and pip
34
+ RUN add-apt-repository ppa:deadsnakes/ppa \
35
+ && apt-get update \
36
+ && apt-get install -y python3.8 python3-pip \
37
+ && apt-get clean;
38
+
39
+ # Set up JAVA_HOME
40
+ ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/
41
+ RUN mkdir -p ${HOME} \
42
+ && echo "export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/" >> ${HOME}/.bashrc \
43
+ && chown -R ${NB_UID}:${NB_UID} ${HOME}
44
+
45
+ # Create a new user named "jovyan" with user ID 1000
46
+ RUN useradd -m -u ${NB_UID} ${NB_USER}
47
+
48
+ # Switch to the "jovyan" user
49
+ USER ${NB_USER}
50
+
51
+ # Set home and path variables for the user
52
+ ENV HOME=/home/${NB_USER} \
53
+ PATH=/home/${NB_USER}/.local/bin:$PATH
54
+
55
+ # Set the working directory to the user's home directory
56
+ WORKDIR ${HOME}
57
+
58
+ # Upgrade pip and install Python dependencies
59
+ RUN python3.8 -m pip install --upgrade pip
60
+ COPY requirements.txt /tmp/requirements.txt
61
+ RUN python3.8 -m pip install -r /tmp/requirements.txt
62
+
63
+ # Copy the application code into the container at /home/jovyan
64
+ COPY --chown=${NB_USER}:${NB_USER} . ${HOME}
65
+
66
+ # Expose port for Streamlit
67
+ EXPOSE 7860
68
+
69
+ # Define the entry point for the container
70
+ ENTRYPOINT ["streamlit", "run", "Demo.py", "--server.port=7860", "--server.address=0.0.0.0"]
images/sarcasm.jpg ADDED
pages/Workflow & Model Overview.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ # Custom CSS for better styling
4
+ st.markdown("""
5
+ <style>
6
+ .main-title {
7
+ font-size: 36px;
8
+ color: #4A90E2;
9
+ font-weight: bold;
10
+ text-align: center;
11
+ }
12
+ .sub-title {
13
+ font-size: 24px;
14
+ color: #4A90E2;
15
+ margin-top: 20px;
16
+ }
17
+ .section {
18
+ background-color: #f9f9f9;
19
+ padding: 15px;
20
+ border-radius: 10px;
21
+ margin-top: 20px;
22
+ }
23
+ .section h2 {
24
+ font-size: 22px;
25
+ color: #4A90E2;
26
+ }
27
+ .section p, .section ul {
28
+ color: #666666;
29
+ }
30
+ .link {
31
+ color: #4A90E2;
32
+ text-decoration: none;
33
+ }
34
+ </style>
35
+ """, unsafe_allow_html=True)
36
+
37
+ # Introduction
38
+ st.markdown('<div class="main-title">Detecting Sarcasm with Spark NLP</div>', unsafe_allow_html=True)
39
+
40
+ st.markdown("""
41
+ <div class="section">
42
+ <p>Welcome to the Spark NLP Sarcasm Detection Demo App! Detecting sarcasm in text is crucial for understanding sentiment and context. This app utilizes advanced natural language processing techniques to identify instances of sarcasm with high accuracy.</p>
43
+ <p>This demo showcases the use of Spark NLP's ClassifierDLModel pretrained on Universal Sentence Encoder embeddings to classify text as sarcastic or normal.</p>
44
+ </div>
45
+ """, unsafe_allow_html=True)
46
+
47
+ st.image('images/sarcasm.jpg', use_column_width='auto')
48
+
49
+ # About Sarcasm Detection
50
+ st.markdown('<div class="sub-title">About Sarcasm Detection</div>', unsafe_allow_html=True)
51
+ st.markdown("""
52
+ <div class="section">
53
+ <p>Sarcasm detection involves identifying language that is contrary to the literal meaning, often used to convey humor or irony. It plays a crucial role in sentiment analysis and understanding textual context.</p>
54
+ <p>Effective sarcasm detection models improve the accuracy of sentiment analysis and help in better understanding user intent.</p>
55
+ </div>
56
+ """, unsafe_allow_html=True)
57
+
58
+ # Using ClassifierDLModel in Spark NLP
59
+ st.markdown('<div class="sub-title">Using ClassifierDLModel in Spark NLP</div>', unsafe_allow_html=True)
60
+ st.markdown("""
61
+ <div class="section">
62
+ <p>The ClassifierDLModel in Spark NLP utilizes deep learning techniques to classify text into predefined categories, in this case, detecting sarcasm or normal text. It is trained on Universal Sentence Encoder embeddings for robust performance.</p>
63
+ <p>For more details, refer to the <a class="link" href="https://sparknlp.org/docs/en/annotators#classifierdl" target="_blank" rel="noopener">ClassifierDLModel documentation</a> on Spark NLP's official website.</p>
64
+ </div>
65
+ """, unsafe_allow_html=True)
66
+
67
+ st.markdown('<h2 class="sub-title">Example Usage in Python</h2>', unsafe_allow_html=True)
68
+ st.markdown('<p>Here’s how you can implement sarcasm detection using the ClassifierDLModel in Spark NLP:</p>', unsafe_allow_html=True)
69
+
70
+ # Setup Instructions
71
+ st.markdown('<div class="sub-title">Setup</div>', unsafe_allow_html=True)
72
+ st.markdown('<p>To use Spark NLP for sarcasm detection, follow these setup instructions:</p>', unsafe_allow_html=True)
73
+ st.code("""
74
+ pip install spark-nlp
75
+ pip install pyspark
76
+ """, language="bash")
77
+
78
+ st.markdown("<p>Then, import Spark NLP and start a Spark session:</p>", unsafe_allow_html=True)
79
+ st.code("""
80
+ import sparknlp
81
+
82
+ # Start Spark Session
83
+ spark = sparknlp.start()
84
+ """, language='python')
85
+
86
+ # Sentiment Analysis Example
87
+ st.markdown('<div class="sub-title">Example Usage: Sarcasm Detection with ClassifierDLModel</div>', unsafe_allow_html=True)
88
+ st.code('''
89
+ from sparknlp.base import DocumentAssembler
90
+ from sparknlp.annotator import UniversalSentenceEncoder, ClassifierDLModel
91
+ from pyspark.ml import Pipeline
92
+
93
+ # Step 1: DocumentAssembler
94
+ document_assembler = DocumentAssembler() \\
95
+ .setInputCol("text") \\
96
+ .setOutputCol("document")
97
+
98
+ # Step 2: UniversalSentenceEncoder
99
+ use = UniversalSentenceEncoder.pretrained() \\
100
+ .setInputCols(["document"]) \\
101
+ .setOutputCol("sentence_embeddings")
102
+
103
+ # Step 3: ClassifierDLModel for Sarcasm Detection
104
+ sentimentdl = ClassifierDLModel.pretrained('classifierdl_use_sarcasm') \\
105
+ .setInputCols(["sentence_embeddings"]) \\
106
+ .setOutputCol("sentiment")
107
+
108
+ # Define the NLP Pipeline
109
+ nlpPipeline = Pipeline(stages=[document_assembler, use, sentimentdl])
110
+
111
+ # Example Text
112
+ text = "Oh, great! Another meeting scheduled for Friday afternoon. That's just what I needed."
113
+
114
+ # Process the text through the pipeline
115
+ result = nlpPipeline.fit(spark.createDataFrame([[text]]).toDF("text")).transform(spark.createDataFrame([[text]]).toDF("text")).select('text', 'sentiment.result').show(truncate=False)
116
+ ''', language='python')
117
+
118
+ st.text("""
119
+ +-------------------------------------------------------------------------------------+---------+
120
+ |text |result |
121
+ +-------------------------------------------------------------------------------------+---------+
122
+ |Oh, great! Another meeting scheduled for Friday afternoon. That's just what I needed.|[sarcasm]|
123
+ +-------------------------------------------------------------------------------------+---------+
124
+ """)
125
+
126
+ st.markdown("""
127
+ <p>The above example demonstrates how to use Spark NLP's ClassifierDLModel to detect sarcasm in text using Universal Sentence Encoder embeddings.</p>
128
+ """, unsafe_allow_html=True)
129
+
130
+ # Benchmarking
131
+ st.markdown('<div class="sub-title">Benchmarking</div>', unsafe_allow_html=True)
132
+ st.markdown("""
133
+ <div class="section">
134
+ <p>Performance metrics of the sarcasm detection model:</p>
135
+ <pre>
136
+ precision recall f1-score support
137
+
138
+ normal 0.98 0.89 0.93 495
139
+ sarcasm 0.60 0.91 0.73 93
140
+
141
+ accuracy 0.89 588
142
+ macro avg 0.79 0.90 0.83 588
143
+ weighted avg 0.92 0.89 0.90 588
144
+ </div>
145
+ """, unsafe_allow_html=True)
146
+
147
+ # Conclusion
148
+ st.markdown("""
149
+ <div class="section">
150
+ <h2>Conclusion</h2>
151
+ <p>In this app, we explored how Spark NLP's ClassifierDLModel can be used to detect sarcasm in text. This capability enhances sentiment analysis and contextual understanding in various applications, improving the accuracy of natural language processing tasks.</p>
152
+ </div>
153
+ """, unsafe_allow_html=True)
154
+
155
+ # References and Additional Information
156
+ st.markdown('<div class="sub-title">For additional information, please check the following references.</div>', unsafe_allow_html=True)
157
+
158
+ st.markdown("""
159
+ <div class="section">
160
+ <ul>
161
+ <li>Documentation :&nbsp;<a class="link" href="https://nlp.johnsnowlabs.com/docs/en/annotators#classifierdlmodel" target="_blank" rel="noopener">ClassifierDLModel</a></li>
162
+ <li>Python Docs :&nbsp;<a class="link" href="https://nlp.johnsnowlabs.com/api/python/reference/autosummary/sparknlp.annotator.ClassifierDLModel.html" target="_blank" rel="noopener">ClassifierDLModel</a></li>
163
+ <li>Model Used :&nbsp;<a class="link" href="https://sparknlp.org/2021/01/09/classifierdl_use_sarcasm_en.html" target="_blank" rel="noopener">classifierdl_use_sarcasm</a></li>
164
+ </ul>
165
+ </div>
166
+ """, unsafe_allow_html=True)
167
+
168
+ st.markdown('<div class="sub-title">Community & Support</div>', unsafe_allow_html=True)
169
+ st.markdown("""
170
+ <div class="section">
171
+ <ul>
172
+ <li><a class="link" href="https://sparknlp.org/" target="_blank">Official Website</a>: Documentation and examples</li>
173
+ <li><a class="link" href="https://join.slack.com/t/spark-nlp/shared_invite/zt-198dipu77-L3UWNe_AJ8xqDk0ivmih5Q" target="_blank">Slack</a>: Live discussion with the community and team</li>
174
+ <li><a class="link" href="https://github.com/JohnSnowLabs/spark-nlp" target="_blank">GitHub</a>: Bug reports, feature requests, and contributions</li>
175
+ <li><a class="link" href="https://medium.com/spark-nlp" target="_blank">Medium</a>: Spark NLP articles</li>
176
+ <li><a class="link" href="https://www.youtube.com/channel/UCmFOjlpYEhxf_wJUDuz6xxQ/videos" target="_blank">YouTube</a>: Video tutorials</li>
177
+ </ul>
178
+ </div>
179
+ """, unsafe_allow_html=True)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ numpy
4
+ spark-nlp
5
+ pyspark