Spaces:
Sleeping
Sleeping
Upload 26 files
Browse files- .streamlit/config.toml +3 -0
- Demo.py +128 -0
- Dockerfile +70 -0
- images/johnsnowlabs-toxic-output.png +0 -0
- inputs/sentimentdl_use_imdb/Example1.txt +2 -0
- inputs/sentimentdl_use_imdb/Example10.txt +2 -0
- inputs/sentimentdl_use_imdb/Example2.txt +2 -0
- inputs/sentimentdl_use_imdb/Example3.txt +2 -0
- inputs/sentimentdl_use_imdb/Example4.txt +2 -0
- inputs/sentimentdl_use_imdb/Example5.txt +2 -0
- inputs/sentimentdl_use_imdb/Example6.txt +2 -0
- inputs/sentimentdl_use_imdb/Example7.txt +2 -0
- inputs/sentimentdl_use_imdb/Example8.txt +2 -0
- inputs/sentimentdl_use_imdb/Example9.txt +2 -0
- inputs/sentimentdl_use_twitter/Example1.txt +2 -0
- inputs/sentimentdl_use_twitter/Example10.txt +2 -0
- inputs/sentimentdl_use_twitter/Example2.txt +2 -0
- inputs/sentimentdl_use_twitter/Example3.txt +2 -0
- inputs/sentimentdl_use_twitter/Example4.txt +2 -0
- inputs/sentimentdl_use_twitter/Example5.txt +2 -0
- inputs/sentimentdl_use_twitter/Example6.txt +2 -0
- inputs/sentimentdl_use_twitter/Example7.txt +2 -0
- inputs/sentimentdl_use_twitter/Example8.txt +2 -0
- inputs/sentimentdl_use_twitter/Example9.txt +2 -0
- pages/Workflow & Model Overview.py +213 -0
- requirements.txt +5 -0
.streamlit/config.toml
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
[theme]
|
2 |
+
base="light"
|
3 |
+
primaryColor="#29B4E8"
|
Demo.py
ADDED
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import sparknlp
|
3 |
+
import os
|
4 |
+
import pandas as pd
|
5 |
+
|
6 |
+
from sparknlp.base import *
|
7 |
+
from sparknlp.annotator import *
|
8 |
+
from pyspark.ml import Pipeline
|
9 |
+
from sparknlp.pretrained import PretrainedPipeline
|
10 |
+
|
11 |
+
# Page configuration
|
12 |
+
st.set_page_config(
|
13 |
+
layout="wide",
|
14 |
+
page_title="Spark NLP Demos App",
|
15 |
+
initial_sidebar_state="auto"
|
16 |
+
)
|
17 |
+
|
18 |
+
# CSS for styling
|
19 |
+
st.markdown("""
|
20 |
+
<style>
|
21 |
+
.main-title {
|
22 |
+
font-size: 36px;
|
23 |
+
color: #4A90E2;
|
24 |
+
font-weight: bold;
|
25 |
+
text-align: center;
|
26 |
+
}
|
27 |
+
.section p, .section ul {
|
28 |
+
color: #666666;
|
29 |
+
}
|
30 |
+
</style>
|
31 |
+
""", unsafe_allow_html=True)
|
32 |
+
|
33 |
+
@st.cache_resource
|
34 |
+
def init_spark():
|
35 |
+
return sparknlp.start()
|
36 |
+
|
37 |
+
@st.cache_resource
|
38 |
+
def create_pipeline(model):
|
39 |
+
documentAssembler = DocumentAssembler() \
|
40 |
+
.setInputCol("text") \
|
41 |
+
.setOutputCol("document")
|
42 |
+
|
43 |
+
tokenizer = Tokenizer() \
|
44 |
+
.setInputCols("document") \
|
45 |
+
.setOutputCol("token")
|
46 |
+
|
47 |
+
sequenceClassifier_loaded = BertForSequenceClassification.pretrained("bert_classifier_toxic","en") \
|
48 |
+
.setInputCols(["document", "token"]) \
|
49 |
+
.setOutputCol("class")
|
50 |
+
|
51 |
+
pipeline = Pipeline(stages=[documentAssembler, tokenizer,sequenceClassifier_loaded])
|
52 |
+
|
53 |
+
return pipeline
|
54 |
+
|
55 |
+
def fit_data(pipeline, data):
|
56 |
+
empty_df = spark.createDataFrame([['']]).toDF('text')
|
57 |
+
pipeline_model = pipeline.fit(empty_df)
|
58 |
+
model = LightPipeline(pipeline_model)
|
59 |
+
results = model.fullAnnotate(data)[0]
|
60 |
+
return results['class'][0].result
|
61 |
+
|
62 |
+
# Set up the page layout
|
63 |
+
st.markdown('<div class="main-title">State-of-the-Art Detecting Toxic Comments with Spark NLP</div>', unsafe_allow_html=True)
|
64 |
+
|
65 |
+
# Sidebar content
|
66 |
+
model = st.sidebar.selectbox(
|
67 |
+
"Choose the pretrained model",
|
68 |
+
["bert_classifier_toxic"],
|
69 |
+
help="For more info about the models visit: https://sparknlp.org/models"
|
70 |
+
)
|
71 |
+
|
72 |
+
# Reference notebook link in sidebar
|
73 |
+
link = """
|
74 |
+
<a href="https://github.com/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/streamlit_notebooks/BertForSequenceClassification.ipynb">
|
75 |
+
<img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/>
|
76 |
+
</a>
|
77 |
+
"""
|
78 |
+
st.sidebar.markdown('Reference notebook:')
|
79 |
+
st.sidebar.markdown(link, unsafe_allow_html=True)
|
80 |
+
|
81 |
+
# Load examples
|
82 |
+
examples = [
|
83 |
+
"Streatham again Thank you for the copyedits I have been considering withdrawing the nomination owing to the nonconstructive arguing with a certain reviewer but your edits make me think I cant let everyone down Thank you",
|
84 |
+
"Do you know how pathetic you look with your silly little crusade of removing England or English with articles? Grow up and find something better to do with your life.",
|
85 |
+
"it is obvius to, me, a black man, that orange is for da black Gs, blue is for mexicans and yellow is 4 whites. coz whites look like woman. and mexicans try to compete with da nigger Gs but we r too strong. but in realty the whites also lose to da black man.",
|
86 |
+
"She should stop sticking her tongue out before someone rubs their ass on it. Filthy bitch!!!",
|
87 |
+
"personal attacks dont tell me what i can and can not do i'll come over there right now and throw a brick through ur window",
|
88 |
+
"First i vandalized his page, then i vandalized again, and then his IP adress was blocked hahahahahahahahahahahahahahahahahahaha. This plan is so evil and yet so much fun.",
|
89 |
+
"Silly guy. You should use email like the wikijews",
|
90 |
+
"""Quick question about the section re-ordering... Why was Antagonists switched with Supporting, and why were all ""bad"" characters removed from the Supporting section? (converse) " """,
|
91 |
+
"I will respond until Armenian scum will stop",
|
92 |
+
"Isn't tart a kind of whore or concubine or courtesane Eg she's a politician's tart",
|
93 |
+
"....thats why I kill you and your father and your mother and you brother",
|
94 |
+
"You're ridiculous It's obvious that you have a problem with people with money and education. Leave the Horace Mann page alone. You know nothing of the school whatsoever. HM doesn't admit dirt."
|
95 |
+
|
96 |
+
]
|
97 |
+
|
98 |
+
st.subheader("Classify comments and tweets into Toxic, Insults, Hate, Obscene, Threat.")
|
99 |
+
|
100 |
+
selected_text = st.selectbox("Select a sample", examples)
|
101 |
+
custom_input = st.text_input("Try it for yourself!")
|
102 |
+
|
103 |
+
if custom_input:
|
104 |
+
selected_text = custom_input
|
105 |
+
elif selected_text:
|
106 |
+
selected_text = selected_text
|
107 |
+
|
108 |
+
st.subheader('Selected Text')
|
109 |
+
st.write(selected_text)
|
110 |
+
|
111 |
+
# Initialize Spark and create pipeline
|
112 |
+
spark = init_spark()
|
113 |
+
pipeline = create_pipeline(model)
|
114 |
+
output = fit_data(pipeline, selected_text)
|
115 |
+
|
116 |
+
# Display output sentence
|
117 |
+
if output == 'severe_toxic':
|
118 |
+
st.markdown("""<h3>This seems like a <span style="color: #209DDC">{}</span> tweet. <span style="font-size:35px;">🤬</span></h3>""".format(out), unsafe_allow_html=True)
|
119 |
+
elif output == 'toxic':
|
120 |
+
st.markdown("""<h3>This seems like a <span style="color: #B64434">{}</span> tweet. <span style="font-size:35px;">🤬</span></h3>""".format(output), unsafe_allow_html=True)
|
121 |
+
elif output == 'insult':
|
122 |
+
st.markdown("""<h3>This seems like an <span style="color: #B64434">{}</span> tweet. <span style="font-size:35px;">😰</span></h3>""".format('insulting'), unsafe_allow_html=True)
|
123 |
+
elif output == 'identity_hate':
|
124 |
+
st.markdown("""<h3>This seems like a <span style="color: #B64434">{}</span> tweet. <span style="font-size:35px;">😰</span></h3>""".format(output), unsafe_allow_html=True)
|
125 |
+
elif output == 'obscene':
|
126 |
+
st.markdown("""<h3>This seems like an <span style="color: #B64434">{}</span> tweet. <span style="font-size:35px;">🤬</span></h3>""".format(output), unsafe_allow_html=True)
|
127 |
+
elif output == 'threat':
|
128 |
+
st.markdown("""<h3>This seems like a <span style="color: #B64434">{}</span> tweet. <span style="font-size:35px;">🤬</span></h3>""".format('threatening'), unsafe_allow_html=True)
|
Dockerfile
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Download base image ubuntu 18.04
|
2 |
+
FROM ubuntu:18.04
|
3 |
+
|
4 |
+
# Set environment variables
|
5 |
+
ENV NB_USER jovyan
|
6 |
+
ENV NB_UID 1000
|
7 |
+
ENV HOME /home/${NB_USER}
|
8 |
+
|
9 |
+
# Install required packages
|
10 |
+
RUN apt-get update && apt-get install -y \
|
11 |
+
tar \
|
12 |
+
wget \
|
13 |
+
bash \
|
14 |
+
rsync \
|
15 |
+
gcc \
|
16 |
+
libfreetype6-dev \
|
17 |
+
libhdf5-serial-dev \
|
18 |
+
libpng-dev \
|
19 |
+
libzmq3-dev \
|
20 |
+
python3 \
|
21 |
+
python3-dev \
|
22 |
+
python3-pip \
|
23 |
+
unzip \
|
24 |
+
pkg-config \
|
25 |
+
software-properties-common \
|
26 |
+
graphviz \
|
27 |
+
openjdk-8-jdk \
|
28 |
+
ant \
|
29 |
+
ca-certificates-java \
|
30 |
+
&& apt-get clean \
|
31 |
+
&& update-ca-certificates -f;
|
32 |
+
|
33 |
+
# Install Python 3.8 and pip
|
34 |
+
RUN add-apt-repository ppa:deadsnakes/ppa \
|
35 |
+
&& apt-get update \
|
36 |
+
&& apt-get install -y python3.8 python3-pip \
|
37 |
+
&& apt-get clean;
|
38 |
+
|
39 |
+
# Set up JAVA_HOME
|
40 |
+
ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/
|
41 |
+
RUN mkdir -p ${HOME} \
|
42 |
+
&& echo "export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/" >> ${HOME}/.bashrc \
|
43 |
+
&& chown -R ${NB_UID}:${NB_UID} ${HOME}
|
44 |
+
|
45 |
+
# Create a new user named "jovyan" with user ID 1000
|
46 |
+
RUN useradd -m -u ${NB_UID} ${NB_USER}
|
47 |
+
|
48 |
+
# Switch to the "jovyan" user
|
49 |
+
USER ${NB_USER}
|
50 |
+
|
51 |
+
# Set home and path variables for the user
|
52 |
+
ENV HOME=/home/${NB_USER} \
|
53 |
+
PATH=/home/${NB_USER}/.local/bin:$PATH
|
54 |
+
|
55 |
+
# Set the working directory to the user's home directory
|
56 |
+
WORKDIR ${HOME}
|
57 |
+
|
58 |
+
# Upgrade pip and install Python dependencies
|
59 |
+
RUN python3.8 -m pip install --upgrade pip
|
60 |
+
COPY requirements.txt /tmp/requirements.txt
|
61 |
+
RUN python3.8 -m pip install -r /tmp/requirements.txt
|
62 |
+
|
63 |
+
# Copy the application code into the container at /home/jovyan
|
64 |
+
COPY --chown=${NB_USER}:${NB_USER} . ${HOME}
|
65 |
+
|
66 |
+
# Expose port for Streamlit
|
67 |
+
EXPOSE 7860
|
68 |
+
|
69 |
+
# Define the entry point for the container
|
70 |
+
ENTRYPOINT ["streamlit", "run", "Demo.py", "--server.port=7860", "--server.address=0.0.0.0"]
|
images/johnsnowlabs-toxic-output.png
ADDED
![]() |
inputs/sentimentdl_use_imdb/Example1.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
Demonicus is a movie turned into a video game! I just love the story and the things that goes on in ...
|
2 |
+
Demonicus is a movie turned into a video game! I just love the story and the things that goes on in the film.It is a B-film ofcourse but that doesn`t bother one bit because its made just right and the music was rad! Horror and sword fight freaks,buy this movie now!
|
inputs/sentimentdl_use_imdb/Example10.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
This is to the Zatoichi movies as the "Star Trek" movies were to "Star Trek"--except that in this ca...
|
2 |
+
This is to the Zatoichi movies as the "Star Trek" movies were to "Star Trek"--except that in this case every one of the originals was more entertaining and interesting than this big, shiny re-do, and also better made, if substance is more important than surface. Had I never seen them, I would have thought this good-looking but empty; since I had, I thought its style inappropriate and its content insufficient. The idea of reviving the character in a bigger, slicker production must have sounded good, but there was no point in it, other than the hope of making money; it's just a show, which mostly fails to capture the atmosphere of the character's world and wholly fails to take the character anywhere he hasn't been already (also, the actor wasn't at his best). I'd been hoping to see Ichi at a late stage of life, in a story that would see him out gracefully and draw some conclusion from his experience overall; this just rehashes bits and pieces from the other movies, seasoned with more sex and sfx violence. Not the same experience at all.
|
inputs/sentimentdl_use_imdb/Example2.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
Back when Alec Baldwin and Kim Basinger were a mercurial, hot-tempered, high-powered Hollywood coupl...
|
2 |
+
Back when Alec Baldwin and Kim Basinger were a mercurial, hot-tempered, high-powered Hollywood couple they filmed this (nearly) scene-for-scene remake of the 1972 Steve McQueen-Ali MacGraw action-thriller about a fugitive twosome. It almost worked the first time because McQueen was such a vital presence on the screen--even stone silent and weary, you could sense his clock ticking, his cagey magnetism. Baldwin is not in Steve McQueen's league, but he has his charms and is probably a more versatile actor--if so, this is not a showcase for his attributes. Basinger does well and certainly looks good, but James Woods is artificially hammy in a silly mob-magnet role. A sub-plot involving another couple taken hostage by Baldwin's ex-partner was unbearable in the '72 film and plays even worse here. As for the action scenes, they're pretty old hat, which causes one to wonder: why even remake the original?
|
inputs/sentimentdl_use_imdb/Example3.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
Despite a tight narrative, Johnnie To's Election feels at times like it was once a longer picture, w...
|
2 |
+
Despite a tight narrative, Johnnie To's Election feels at times like it was once a longer picture, with many characters and plot strands abandoned or ultimately unresolved. Some of these are dealt with in the truly excellent and far superior sequel, Election 2: Harmony is a Virtue, but it's still a dependably enthralling thriller about a contested Triad election that bypasses the usual shootouts and explosions (though not the violence) in favour of constantly shifting alliances that can turn in the time it takes to make a phone call. It's also a film where the most ruthless character isn't always the most threatening one, as the chilling ending makes only too clear: one can imagine a lifetime of psychological counselling being necessary for all the trauma that one inflicts on one unfortunate bystander. Simon Yam, all too often a variable actor but always at his best under To's direction, has possibly never been better in the lead, not least because Tony Leung's much more extrovert performance makes his stillness more the powerful.
|
inputs/sentimentdl_use_imdb/Example4.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
This movie has successfully proved what we all already know, that professional basket-ball players s...
|
2 |
+
This movie has successfully proved what we all already know, that professional basket-ball players suck at everything besides playing basket-ball. Especially rapping and acting. I can not even begin to describe how bad this movie truly is. First of all, is it just me, or is that the ugliest kid you have ever seen? I mean, his teeth could be used as a can-opener. Secondly, why would a genie want to pursue a career in the music industry when, even though he has magical powers, he sucks horribly at making music? Third, I have read the Bible. In no way shape or form did it say that Jesus made genies. Fourth, what was the deal with all the crappy special effects? I assure you that any acne-addled nerdy teenager with a computer could make better effects than that. Fifth, why did the ending suck so badly? And what the hell is a djin? And finally, whoever created the nightmare known as Kazaam needs to be thrown off of a plane and onto the Eiffel Tower, because this movie take the word "suck" to an entirely new level.
|
inputs/sentimentdl_use_imdb/Example5.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
The fluttering of butterfly wings in the Atlantic can unleash a hurricane in the Pacific. According ...
|
2 |
+
The fluttering of butterfly wings in the Atlantic can unleash a hurricane in the Pacific. According to this theory (somehow related to the Chaos Theory, I'm not sure exactly how), every action, no matter how small or insignificant, will start a chain reaction that can lead to big events. This small jewel of a film shows us a series of seemingly-unrelated characters, most of them in Paris, whose actions will affect each others' lives. (The six-degrees-of-separation theory can be applied as well.) Each story is a facet of the jewel that is this film. The acting is finely-tuned and nuanced (Audrey Tautou is luminous), the stories mesh plausibly, the humor is just right, and the viewer leaves the theatre nodding in agreement.
|
inputs/sentimentdl_use_imdb/Example6.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
There have been very few films I have not been able to sit through. I made it through Battle Field E...
|
2 |
+
There have been very few films I have not been able to sit through. I made it through Battle Field Earth no problem. But this, This is one of the single worst films EVER to be made. I understand Whoopi Goldberg tried to get of acting in it. I do not blame her. I would feel ashamed to have this on a resume. I belive it is a rare occasion when almost every gag in a film falls flat on it's face. Well it happens here. Not to mention the SFX, look for the dino with the control cables hanging out of it rear end!!!!!! Halfway through the film I was still looking for a plot. I never found one. Save yourself the trouble of renting this and save 90 minutes of your life.
|
inputs/sentimentdl_use_imdb/Example7.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
After a long hard week behind the desk making all those dam serious decisions this movie is a great ...
|
2 |
+
After a long hard week behind the desk making all those dam serious decisions this movie is a great way to relax. Like Wells and the original radio broadcast this movie will take you away to a land of alien humor and sci-fi paraday. 'Captain Zippo died in the great charge of the Buick. He was a brave man.' The Jack Nicholson impressions shine right through that alien face with the dark sun glasses and leather jacket. And always remember to beware of the 'doughnut of death!' Keep in mind the number one rule of this movie - suspension of disbelief - sit back and relax - and 'Prepare to die Earth Scum!' You just have to see it for yourself.
|
inputs/sentimentdl_use_imdb/Example8.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
When Ritchie first burst on to movie scene his films were hailed as funny, witty, well directed and ...
|
2 |
+
When Ritchie first burst on to movie scene his films were hailed as funny, witty, well directed and original. If one could compare the hype he had generated with his first two attempts and the almost universal loathing his last two outings have created one should consider - has Ritchie been found out? Is he really that talented? Does he really have any genuine original ideas? Or is he simply a pretentious and egotistical director who really wants to be Fincher, Tarantino and Leone all rolled into one colossal and disorganised heap? After watching Revolver one could be excused for thinking were did it all go wrong? What happened to his great sense of humour? Where did he get all these mixed and convoluted ideas from? Revolver tries to be clever, philosophical and succinct, it tries to be an intelligent psychoanalysis, it tries to be an intricate and complicated thriller. Ritchie does make a gargantuan effort to fulfil all these many objectives and invests great chunks of a script into existential musings and numerous plot twists. However, in the end all it serves is to construct a severely disjointed, unstructured and ultimately unfriendly film to the audience. Its plagiarism is so sinful and blatant that although Ritchie does at least attempt to give his own spin he should be punished for even trying to pass it off as his own work. So what the audience gets ultimately is a terrible screenplay intertwined with many pretentious oneliners and clumsy setpieces.<br /><br />Revolver is ultimately an unoriginal and bland movie that has stolen countless themes from masterpieces like Fight Club, Usual Suspects and Pulp Fiction. It aims high, but inevitably shots blanks aplenty.<br /><br />Revolver deserves to be lambasted, it is a truly poor film masquerading as a wannabe masterpiece from a wannabe auteur. However, it falls flat on its farcical face and just fails at everything it wants to be and achieve.
|
inputs/sentimentdl_use_imdb/Example9.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
I always thought this would be a long and boring Talking-Heads flick full of static interior takes, ...
|
2 |
+
I always thought this would be a long and boring Talking-Heads flick full of static interior takes, dude, I was wrong. "Election" is a highly fascinating and thoroughly captivating thriller-drama, taking a deep and realistic view behind the origins of Triads-Rituals. Characters are constantly on the move, and although as a viewer you kinda always remain an outsider, it's still possible to feel the suspense coming from certain decisions and ambitions of the characters. Furthermore Johnnie To succeeds in creating some truly opulent images due to meticulously composed lighting and atmospheric light-shadow contrasts. Although there's hardly any action, the ending is still shocking in it's ruthless depicting of brutality. Cool movie that deserves more attention, and I came to like the minimalistic acoustic guitar score quite a bit.
|
inputs/sentimentdl_use_twitter/Example1.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
@Mbjthegreat i really dont want AT&T phone service..they suck when it comes to having a signal...
|
2 |
+
@Mbjthegreat i really dont want AT&T phone service..they suck when it comes to having a signal
|
inputs/sentimentdl_use_twitter/Example10.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
@PDubyaD right!!! LOL we'll get there!! I have high expectations, Warren Buffet style....
|
2 |
+
@PDubyaD right!!! LOL we'll get there!! I have high expectations, Warren Buffet style.
|
inputs/sentimentdl_use_twitter/Example2.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
holy crap. I take a nap for 4 hours and Pitchfork blows up my twitter dashboard. I wish I was at Coa...
|
2 |
+
holy crap. I take a nap for 4 hours and Pitchfork blows up my twitter dashboard. I wish I was at Coachella.
|
inputs/sentimentdl_use_twitter/Example3.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
@Susy412 he is working today ive tried that still not working..... hmmmm!! im rubbish with computer...
|
2 |
+
@Susy412 he is working today ive tried that still not working..... hmmmm!! im rubbish with computers haha!
|
inputs/sentimentdl_use_twitter/Example4.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
Brand New Canon EOS 50D 15MP DSLR Camera Canon 17-85mm IS Lens ...: Web Technology Thread, Brand New...
|
2 |
+
Brand New Canon EOS 50D 15MP DSLR Camera Canon 17-85mm IS Lens ...: Web Technology Thread, Brand New Canon EOS 5.. http://u.mavrev.com/5a3t
|
inputs/sentimentdl_use_twitter/Example5.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
Watching a programme about the life of Hitler, its only enhancing my geekiness of history....
|
2 |
+
Watching a programme about the life of Hitler, its only enhancing my geekiness of history.
|
inputs/sentimentdl_use_twitter/Example6.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
GM says expects announcment on sale of Hummer soon - Reuters: WDSUGM says expects announcment on sal...
|
2 |
+
GM says expects announcment on sale of Hummer soon - Reuters: WDSUGM says expects announcment on sale of Hummer .. http://bit.ly/4E1Fv
|
inputs/sentimentdl_use_twitter/Example7.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
@accannis @edog1203 Great Stanford course. Thanks for making it available to the public! Really help...
|
2 |
+
@accannis @edog1203 Great Stanford course. Thanks for making it available to the public! Really helpful and informative for starting off!
|
inputs/sentimentdl_use_twitter/Example8.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
@the_real_usher LeBron is cool. I like his personality...he has good character....
|
2 |
+
@the_real_usher LeBron is cool. I like his personality...he has good character.
|
inputs/sentimentdl_use_twitter/Example9.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
@sketchbug Lebron is a hometown hero to me, lol I love the Lakers but let's go Cavs, lol...
|
2 |
+
@sketchbug Lebron is a hometown hero to me, lol I love the Lakers but let's go Cavs, lol
|
pages/Workflow & Model Overview.py
ADDED
@@ -0,0 +1,213 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
|
3 |
+
# Custom CSS for better styling
|
4 |
+
st.markdown("""
|
5 |
+
<style>
|
6 |
+
.main-title {
|
7 |
+
font-size: 36px;
|
8 |
+
color: #4A90E2;
|
9 |
+
font-weight: bold;
|
10 |
+
text-align: center;
|
11 |
+
}
|
12 |
+
.sub-title {
|
13 |
+
font-size: 24px;
|
14 |
+
color: #4A90E2;
|
15 |
+
margin-top: 20px;
|
16 |
+
}
|
17 |
+
.section {
|
18 |
+
background-color: #f9f9f9;
|
19 |
+
padding: 15px;
|
20 |
+
border-radius: 10px;
|
21 |
+
margin-top: 20px;
|
22 |
+
}
|
23 |
+
.section h2 {
|
24 |
+
font-size: 22px;
|
25 |
+
color: #4A90E2;
|
26 |
+
}
|
27 |
+
.section p, .section ul {
|
28 |
+
color: #666666;
|
29 |
+
}
|
30 |
+
.link {
|
31 |
+
color: #4A90E2;
|
32 |
+
text-decoration: none;
|
33 |
+
}
|
34 |
+
</style>
|
35 |
+
""", unsafe_allow_html=True)
|
36 |
+
|
37 |
+
# Introduction
|
38 |
+
st.markdown('<div class="main-title">Detecting Toxic Comments with Spark NLP</div>', unsafe_allow_html=True)
|
39 |
+
|
40 |
+
st.markdown("""
|
41 |
+
<div class="section">
|
42 |
+
<p>Welcome to the Spark NLP Toxic Comment Detection Demo App! Discussing things you care about can be difficult. The threat of abuse and harassment online means that many people stop expressing themselves and give up on seeking different opinions. Platforms struggle to effectively facilitate conversations, leading many communities to limit or completely shut down user comments.</p>
|
43 |
+
<p>This app demonstrates how to use Spark NLP's MulticlassifierDL to automatically detect toxic comments, including categories like identity hate, insult, obscene, severe toxic, and threat.</p>
|
44 |
+
</div>
|
45 |
+
""", unsafe_allow_html=True)
|
46 |
+
|
47 |
+
# st.image('images/Toxic-Comments.jpg', caption="Different types of toxic comments detected using Spark NLP", use_column_width='auto')
|
48 |
+
|
49 |
+
# About Toxic Comment Classification
|
50 |
+
st.markdown('<div class="sub-title">About Toxic Comment Classification</div>', unsafe_allow_html=True)
|
51 |
+
st.markdown("""
|
52 |
+
<div class="section">
|
53 |
+
<p>The Conversation AI team, a research initiative founded by Jigsaw and Google (both part of Alphabet), is working on tools to help improve online conversations. One area of focus is the study of negative online behaviors, like toxic comments (comments that are rude, disrespectful, or likely to make someone leave a discussion).</p>
|
54 |
+
<p>This app utilizes the Spark NLP MulticlassifierDL model to detect various types of toxicity in comments. This model is capable of identifying and categorizing toxic comments into different classes such as toxic, severe toxic, identity hate, insult, obscene, and threat.</p>
|
55 |
+
</div>
|
56 |
+
""", unsafe_allow_html=True)
|
57 |
+
|
58 |
+
# Using MulticlassifierDL in Spark NLP
|
59 |
+
st.markdown('<div class="sub-title">Using MulticlassifierDL in Spark NLP</div>', unsafe_allow_html=True)
|
60 |
+
st.markdown("""
|
61 |
+
<div class="section">
|
62 |
+
<p>The MulticlassifierDL annotator in Spark NLP uses deep learning to classify text into multiple categories. This approach allows for a more nuanced understanding of the toxicity in comments, providing better tools for moderating online discussions.</p>
|
63 |
+
<p>Spark NLP also offers other annotators and models for different NLP tasks. If you are interested in exploring more, please check the <a class="link" href="https://nlp.johnsnowlabs.com/docs/en/annotators#multiclassifierdl" target="_blank" rel="noopener">MulticlassifierDL</a> documentation.</p>
|
64 |
+
</div>
|
65 |
+
""", unsafe_allow_html=True)
|
66 |
+
|
67 |
+
st.markdown('<h2 class="sub-title">Example Usage in Python</h2>', unsafe_allow_html=True)
|
68 |
+
st.markdown('<p>Here’s how you can implement toxic comment classification using the MulticlassifierDL annotator in Spark NLP:</p>', unsafe_allow_html=True)
|
69 |
+
|
70 |
+
# Setup Instructions
|
71 |
+
st.markdown('<div class="sub-title">Setup</div>', unsafe_allow_html=True)
|
72 |
+
st.markdown('<p>To install Spark NLP in Python, use your favorite package manager (conda, pip, etc.). For example:</p>', unsafe_allow_html=True)
|
73 |
+
st.code("""
|
74 |
+
pip install spark-nlp
|
75 |
+
pip install pyspark
|
76 |
+
""", language="bash")
|
77 |
+
|
78 |
+
st.markdown("<p>Then, import Spark NLP and start a Spark session:</p>", unsafe_allow_html=True)
|
79 |
+
st.code("""
|
80 |
+
import sparknlp
|
81 |
+
|
82 |
+
# Start Spark Session
|
83 |
+
spark = sparknlp.start()
|
84 |
+
""", language='python')
|
85 |
+
|
86 |
+
# Toxic Comment Classification Example
|
87 |
+
st.markdown('<div class="sub-title">Example Usage: Toxic Comment Classification with MulticlassifierDL</div>', unsafe_allow_html=True)
|
88 |
+
st.code('''
|
89 |
+
from sparknlp.base import DocumentAssembler
|
90 |
+
from sparknlp.annotator import UniversalSentenceEncoder, MultiClassifierDLModel
|
91 |
+
from pyspark.ml import Pipeline
|
92 |
+
|
93 |
+
# Step 1: Transforms raw texts to document annotation
|
94 |
+
document = DocumentAssembler() \\
|
95 |
+
.setInputCol("text") \\
|
96 |
+
.setOutputCol("document")
|
97 |
+
|
98 |
+
# Step 2: Use Universal Sentence Encoder for embeddings
|
99 |
+
use = UniversalSentenceEncoder.pretrained() \\
|
100 |
+
.setInputCols(["document"]) \\
|
101 |
+
.setOutputCol("use_embeddings")
|
102 |
+
|
103 |
+
# Step 3: Multiclass classification model
|
104 |
+
docClassifier = MultiClassifierDLModel.pretrained("multiclassifierdl_use_toxic") \\
|
105 |
+
.setInputCols(["use_embeddings"]) \\
|
106 |
+
.setOutputCol("category") \\
|
107 |
+
.setThreshold(0.5)
|
108 |
+
|
109 |
+
# Define the pipeline
|
110 |
+
pipeline = Pipeline(
|
111 |
+
stages = [
|
112 |
+
document,
|
113 |
+
use,
|
114 |
+
docClassifier
|
115 |
+
]
|
116 |
+
)
|
117 |
+
|
118 |
+
# Create a Spark Data Frame with example sentences
|
119 |
+
data = spark.createDataFrame(
|
120 |
+
[
|
121 |
+
["She should stop sticking her tongue out before someone rubs their ass on it. Filthy bitch!!!"]
|
122 |
+
]
|
123 |
+
).toDF("text") # use the column name `text` defined in the pipeline as input
|
124 |
+
|
125 |
+
# Fit-transform to get predictions
|
126 |
+
result = pipeline.fit(data).transform(data).select("text", "category.result").show(truncate=50)
|
127 |
+
''', language='python')
|
128 |
+
|
129 |
+
st.text("""
|
130 |
+
+--------------------------------------------------+------------------------+
|
131 |
+
| text| result|
|
132 |
+
+--------------------------------------------------+------------------------+
|
133 |
+
|She should stop sticking her tongue out before ...|[toxic, insult, obscene]|
|
134 |
+
+--------------------------------------------------+------------------------+
|
135 |
+
""")
|
136 |
+
|
137 |
+
st.markdown("""
|
138 |
+
<p>The code snippet demonstrates how to set up a pipeline in Spark NLP to classify toxic comments using the MulticlassifierDL annotator. The resulting DataFrame contains the predictions for each comment.</p>
|
139 |
+
""", unsafe_allow_html=True)
|
140 |
+
|
141 |
+
# One-liner Alternative
|
142 |
+
st.markdown('<div class="sub-title">One-liner Alternative</div>', unsafe_allow_html=True)
|
143 |
+
st.markdown("""
|
144 |
+
<div class="section">
|
145 |
+
<p>In October 2022, John Snow Labs released the open-source <code>johnsnowlabs</code> library that contains all the company products, open-source and licensed, under one common library. This simplified the workflow, especially for users working with more than one of the libraries (e.g., Spark NLP + Healthcare NLP). This new library is a wrapper on all of John Snow Lab’s libraries and can be installed with pip:</p>
|
146 |
+
<p><code>pip install johnsnowlabs</code></p>
|
147 |
+
</div>
|
148 |
+
""", unsafe_allow_html=True)
|
149 |
+
|
150 |
+
st.markdown('<p>To run toxic comment classification with one line of code, we can simply:</p>', unsafe_allow_html=True)
|
151 |
+
st.code("""
|
152 |
+
# Import the NLP module which contains Spark NLP and NLU libraries
|
153 |
+
from johnsnowlabs import nlp
|
154 |
+
|
155 |
+
sample_text = ["You are a horrible person!", "I love your new profile picture!", "Go away, no one likes you."]
|
156 |
+
|
157 |
+
# Returns a pandas DataFrame, we select the desired columns
|
158 |
+
nlp.load('en.classify.toxic').predict(sample_text, output_level='sentence')
|
159 |
+
""", language='python')
|
160 |
+
|
161 |
+
st.image('images/johnsnowlabs-toxic-output.png', use_column_width='auto')
|
162 |
+
|
163 |
+
st.markdown("""
|
164 |
+
<p>This approach demonstrates how to use the <code>johnsnowlabs</code> library to perform toxic comment classification with a single line of code. The resulting DataFrame contains the predictions for each comment.</p>
|
165 |
+
""", unsafe_allow_html=True)
|
166 |
+
|
167 |
+
# Benchmarking
|
168 |
+
st.markdown('<div class="sub-title">Benchmarking</div>', unsafe_allow_html=True)
|
169 |
+
st.markdown("""
|
170 |
+
<div class="section">
|
171 |
+
<p>Here are the benchmarking results for the MulticlassifierDL model on the toxic comment classification task:</p>
|
172 |
+
<pre>
|
173 |
+
precision recall f1-score support
|
174 |
+
|
175 |
+
0 0.56 0.30 0.39 127
|
176 |
+
1 0.71 0.70 0.70 761
|
177 |
+
2 0.76 0.72 0.74 824
|
178 |
+
3 0.55 0.21 0.31 147
|
179 |
+
4 0.79 0.38 0.51 50
|
180 |
+
5 0.94 1.00 0.97 1504
|
181 |
+
|
182 |
+
micro avg 0.83 0.80 0.81 3413
|
183 |
+
macro avg 0.72 0.55 0.60 3413
|
184 |
+
weighted avg 0.81 0.80 0.80 3413
|
185 |
+
samples avg 0.84 0.83 0.80 3413
|
186 |
+
|
187 |
+
F1 micro averaging: 0.8113432835820896
|
188 |
+
</div>
|
189 |
+
""", unsafe_allow_html=True)
|
190 |
+
|
191 |
+
# Additional Resources
|
192 |
+
st.markdown('<div class="sub-title">Additional Resources</div>', unsafe_allow_html=True)
|
193 |
+
st.markdown("""
|
194 |
+
<div class="section">
|
195 |
+
<ul>
|
196 |
+
<li>Python Docs : <a class="link" href="https://nlp.johnsnowlabs.com/docs/en/annotators#multiclassifierdl" target="_blank" rel="noopener">ClassifierDLModel</a></li>
|
197 |
+
<li>Model used : <a class="link" href="https://sparknlp.org/2021/01/21/multiclassifierdl_use_toxic_en.html" target="_blank" rel="noopener">multiclassifierdl_use_toxic</a></li>
|
198 |
+
<li><a class="link" href="https://sparknlp.org/" target="_blank">Official Website</a>: Documentation and examples</li>
|
199 |
+
</ul>
|
200 |
+
</div>
|
201 |
+
""", unsafe_allow_html=True)
|
202 |
+
|
203 |
+
st.markdown('<div class="sub-title">Community & Support</div>', unsafe_allow_html=True)
|
204 |
+
st.markdown("""
|
205 |
+
<div class="section">
|
206 |
+
<ul>
|
207 |
+
<li><a class="link" href="https://join.slack.com/t/spark-nlp/shared_invite/zt-198dipu77-L3UWNe_AJ8xqDk0ivmih5Q" target="_blank">Slack</a>: Live discussion with the community and team</li>
|
208 |
+
<li><a class="link" href="https://github.com/JohnSnowLabs/spark-nlp" target="_blank">GitHub</a>: Bug reports, feature requests, and contributions</li>
|
209 |
+
<li><a class="link" href="https://medium.com/spark-nlp" target="_blank">Medium</a>: Spark NLP articles</li>
|
210 |
+
<li><a class="link" href="https://www.youtube.com/channel/UCmFOjlpYEhxf_wJUDuz6xxQ/videos" target="_blank">YouTube</a>: Video tutorials</li>
|
211 |
+
</ul>
|
212 |
+
</div>
|
213 |
+
""", unsafe_allow_html=True)
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
pandas
|
3 |
+
numpy
|
4 |
+
spark-nlp
|
5 |
+
pyspark
|