abdullahmubeen10 commited on
Commit
1611e10
·
verified ·
1 Parent(s): 5f9d3c2

Upload 6 files

Browse files
.streamlit/config.toml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [theme]
2
+ base="light"
3
+ primaryColor="#29B4E8"
Demo.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import sparknlp
3
+ import os
4
+ import pandas as pd
5
+
6
+ from sparknlp.base import *
7
+ from sparknlp.annotator import *
8
+ from pyspark.ml import Pipeline
9
+ from sparknlp.pretrained import PretrainedPipeline
10
+
11
+ # Page configuration
12
+ st.set_page_config(
13
+ layout="wide",
14
+ page_title="Spark NLP Financial Sentiment Analysis",
15
+ initial_sidebar_state="expanded"
16
+ )
17
+
18
+ # CSS for styling
19
+ st.markdown("""
20
+ <style>
21
+ .main-title {
22
+ font-size: 36px;
23
+ color: #4A90E2;
24
+ font-weight: bold;
25
+ text-align: center;
26
+ }
27
+ .section p, .section ul {
28
+ color: #666666;
29
+ }
30
+ .result-positive {
31
+ color: green;
32
+ }
33
+ .result-negative {
34
+ color: red;
35
+ }
36
+ .result-neutral {
37
+ color: #209DDC;
38
+ }
39
+ </style>
40
+ """, unsafe_allow_html=True)
41
+
42
+ @st.cache_resource
43
+ def init_spark():
44
+ return sparknlp.start()
45
+
46
+ @st.cache_resource
47
+ def create_pipeline(model):
48
+ document = DocumentAssembler()\
49
+ .setInputCol("text")\
50
+ .setOutputCol("document")
51
+
52
+ embeddings = BertSentenceEmbeddings\
53
+ .pretrained('sent_bert_wiki_books_sst2', 'en') \
54
+ .setInputCols(["document"])\
55
+ .setOutputCol("sentence_embeddings")
56
+
57
+ sentimentClassifier = ClassifierDLModel.pretrained("classifierdl_bertwiki_finance_sentiment", "en") \
58
+ .setInputCols(["sentence_embeddings"]) \
59
+ .setOutputCol("class_")
60
+
61
+ financial_sentiment_pipeline = Pipeline(
62
+ stages=[document,
63
+ embeddings,
64
+ sentimentClassifier])
65
+
66
+ return financial_sentiment_pipeline
67
+
68
+ def fit_data(pipeline, data):
69
+ empty_df = spark.createDataFrame([['']]).toDF('text')
70
+ pipeline_model = pipeline.fit(empty_df)
71
+ model = LightPipeline(pipeline_model)
72
+ results = model.fullAnnotate(data)[0]
73
+
74
+ return results['class_'][0].result
75
+
76
+ # Set up the page layout
77
+ st.markdown('<div class="main-title">Sentiment Analysis of Financial News with Spark NLP</div>', unsafe_allow_html=True)
78
+
79
+ # Sidebar content
80
+ model = st.sidebar.selectbox(
81
+ "Choose the pretrained model",
82
+ ["classifierdl_bertwiki_finance_sentiment"],
83
+ help="For more info about the models visit: https://sparknlp.org/models"
84
+ )
85
+
86
+ # Reference notebook link in sidebar
87
+ colab_link = """
88
+ <a href="https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/streamlit_notebooks/SENTIMENT_EN_FINANCE.ipynb">
89
+ <img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/>
90
+ </a>
91
+ """
92
+ st.sidebar.markdown('Reference notebook:')
93
+ st.sidebar.markdown(colab_link, unsafe_allow_html=True)
94
+
95
+ # Load examples
96
+ examples = [
97
+ "In April 2005, Neste separated from its parent company, Finnish energy company Fortum, and became listed on the Helsinki Stock Exchange.",
98
+ "Finnish IT solutions provider Affecto Oyj HEL: AFE1V said today it slipped to a net loss of EUR 115,000 USD 152,000 in the second quarter of 2010 from a profit of EUR 845,000 in the corresponding period a year earlier.",
99
+ "10 February 2011 - Finnish media company Sanoma Oyj HEL: SAA1V said yesterday its 2010 net profit almost tripled to EUR297.3m from EUR107.1m for 2009 and announced a proposal for a raised payout.",
100
+ "Profit before taxes decreased by 9% to EUR 187.8 mn in the first nine months of 2008, compared to EUR 207.1 mn a year earlier.",
101
+ "The world's second largest stainless steel maker said net profit in the three-month period until Dec. 31 surged to euro603 million US$ 781 million, or euro3.33 US$ 4.31 per share, from euro172 million, or euro0.94 per share, the previous year.",
102
+ "TietoEnator signed an agreement to acquire Indian research and development (R&D) services provider and turnkey software solutions developer Fortuna Technologies Pvt. Ltd. for 21 mln euro ($30.3 mln) in September 2007."
103
+ ]
104
+
105
+ # User input selection
106
+ selected_text = st.selectbox("Select a sample", examples)
107
+ custom_input = st.text_input("Try it for yourself!")
108
+
109
+ # Use custom input if provided
110
+ if custom_input:
111
+ selected_text = custom_input
112
+
113
+ st.subheader('Selected Text')
114
+ st.write(selected_text)
115
+
116
+ # Initialize Spark and create pipeline
117
+ spark = init_spark()
118
+ pipeline = create_pipeline(model)
119
+ output = fit_data(pipeline, selected_text)
120
+
121
+ # Display output sentiment
122
+ if output.lower() in ['pos', 'positive']:
123
+ st.markdown(f"""<h3>This seems like <span class="result-positive">positive</span> news. <span style="font-size:35px;">&#128515;</span></h3>""", unsafe_allow_html=True)
124
+ elif output.lower() in ['neg', 'negative']:
125
+ st.markdown(f"""<h3>This seems like <span class="result-negative">negative</span> news. <span style="font-size:35px;">&#128544;</span></h3>""", unsafe_allow_html=True)
126
+ else:
127
+ st.markdown(f"""<h3>This seems like <span class="result-neutral">{output}</span> news. <span style="font-size:35px;">&#128578;</span></h3>""", unsafe_allow_html=True)
Dockerfile ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Download base image ubuntu 18.04
2
+ FROM ubuntu:18.04
3
+
4
+ # Set environment variables
5
+ ENV NB_USER jovyan
6
+ ENV NB_UID 1000
7
+ ENV HOME /home/${NB_USER}
8
+
9
+ # Install required packages
10
+ RUN apt-get update && apt-get install -y \
11
+ tar \
12
+ wget \
13
+ bash \
14
+ rsync \
15
+ gcc \
16
+ libfreetype6-dev \
17
+ libhdf5-serial-dev \
18
+ libpng-dev \
19
+ libzmq3-dev \
20
+ python3 \
21
+ python3-dev \
22
+ python3-pip \
23
+ unzip \
24
+ pkg-config \
25
+ software-properties-common \
26
+ graphviz \
27
+ openjdk-8-jdk \
28
+ ant \
29
+ ca-certificates-java \
30
+ && apt-get clean \
31
+ && update-ca-certificates -f;
32
+
33
+ # Install Python 3.8 and pip
34
+ RUN add-apt-repository ppa:deadsnakes/ppa \
35
+ && apt-get update \
36
+ && apt-get install -y python3.8 python3-pip \
37
+ && apt-get clean;
38
+
39
+ # Set up JAVA_HOME
40
+ ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/
41
+ RUN mkdir -p ${HOME} \
42
+ && echo "export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/" >> ${HOME}/.bashrc \
43
+ && chown -R ${NB_UID}:${NB_UID} ${HOME}
44
+
45
+ # Create a new user named "jovyan" with user ID 1000
46
+ RUN useradd -m -u ${NB_UID} ${NB_USER}
47
+
48
+ # Switch to the "jovyan" user
49
+ USER ${NB_USER}
50
+
51
+ # Set home and path variables for the user
52
+ ENV HOME=/home/${NB_USER} \
53
+ PATH=/home/${NB_USER}/.local/bin:$PATH
54
+
55
+ # Set the working directory to the user's home directory
56
+ WORKDIR ${HOME}
57
+
58
+ # Upgrade pip and install Python dependencies
59
+ RUN python3.8 -m pip install --upgrade pip
60
+ COPY requirements.txt /tmp/requirements.txt
61
+ RUN python3.8 -m pip install -r /tmp/requirements.txt
62
+
63
+ # Copy the application code into the container at /home/jovyan
64
+ COPY --chown=${NB_USER}:${NB_USER} . ${HOME}
65
+
66
+ # Expose port for Streamlit
67
+ EXPOSE 7860
68
+
69
+ # Define the entry point for the container
70
+ ENTRYPOINT ["streamlit", "run", "Demo.py", "--server.port=7860", "--server.address=0.0.0.0"]
images/financial-sentiment.png ADDED
pages/Workflow & Model Overview.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ # Custom CSS for better styling
4
+ st.markdown("""
5
+ <style>
6
+ .main-title {
7
+ font-size: 36px;
8
+ color: #4A90E2;
9
+ font-weight: bold;
10
+ text-align: center;
11
+ }
12
+ .sub-title {
13
+ font-size: 24px;
14
+ color: #4A90E2;
15
+ margin-top: 20px;
16
+ }
17
+ .section {
18
+ background-color: #f9f9f9;
19
+ padding: 15px;
20
+ border-radius: 10px;
21
+ margin-top: 20px;
22
+ }
23
+ .section h2 {
24
+ font-size: 22px;
25
+ color: #4A90E2;
26
+ }
27
+ .section p, .section ul {
28
+ color: #666666;
29
+ }
30
+ .link {
31
+ color: #4A90E2;
32
+ text-decoration: none;
33
+ }
34
+ </style>
35
+ """, unsafe_allow_html=True)
36
+
37
+ # Introduction
38
+ st.markdown('<div class="main-title">Financial Sentiment Analysis with Spark NLP</div>', unsafe_allow_html=True)
39
+
40
+ st.markdown("""
41
+ <div class="section">
42
+ <p>Welcome to the Spark NLP Financial Sentiment Analysis Demo App! Financial sentiment analysis is the process of identifying and categorizing the emotional tone of financial news articles, reports, tweets, and other textual data related to finance. Using Spark NLP, this app demonstrates how to accurately analyze the sentiment of financial texts.</p>
43
+ <p>This app leverages Spark NLP's advanced models to detect sentiments in financial texts, helping users gain insights into market sentiment and make informed decisions.</p>
44
+ </div>
45
+ """, unsafe_allow_html=True)
46
+
47
+ st.image('images/financial-sentiment.png', use_column_width='auto')
48
+
49
+ # About Financial Sentiment Analysis
50
+ st.markdown('<div class="sub-title">About Financial Sentiment Analysis</div>', unsafe_allow_html=True)
51
+ st.markdown("""
52
+ <div class="section">
53
+ <p>Financial sentiment analysis involves analyzing texts to determine whether the expressed sentiment is positive, negative, or neutral with respect to financial markets. It is widely used by investors, traders, and analysts to gauge market sentiment and predict market movements.</p>
54
+ <p>Applications of financial sentiment analysis include analyzing news articles, earnings reports, social media posts, and more to identify trends and make data-driven investment decisions.</p>
55
+ </div>
56
+ """, unsafe_allow_html=True)
57
+
58
+ # Using Sentiment Analysis in Spark NLP
59
+ st.markdown('<div class="sub-title">Using Sentiment Analysis in Spark NLP</div>', unsafe_allow_html=True)
60
+ st.markdown("""
61
+ <div class="section">
62
+ <p>The following pipeline demonstrates how to use Spark NLP for financial sentiment analysis:</p>
63
+ </div>
64
+ """, unsafe_allow_html=True)
65
+
66
+ # Setup Instructions
67
+ st.markdown('<div class="sub-title">Setup</div>', unsafe_allow_html=True)
68
+ st.markdown('<p>To install Spark NLP in Python, use your favorite package manager (conda, pip, etc.). For example:</p>', unsafe_allow_html=True)
69
+ st.code("""
70
+ pip install spark-nlp
71
+ pip install pyspark
72
+ """, language="bash")
73
+
74
+ st.markdown("<p>Then, import Spark NLP and start a Spark session:</p>", unsafe_allow_html=True)
75
+ st.code("""
76
+ import sparknlp
77
+
78
+ # Start Spark Session
79
+ spark = sparknlp.start()
80
+ """, language='python')
81
+
82
+ # Example Pipeline for Financial Sentiment Analysis
83
+ st.markdown('<div class="sub-title">Example Usage: Financial Sentiment Analysis with Spark NLP</div>', unsafe_allow_html=True)
84
+ st.code('''
85
+ from sparknlp.base import DocumentAssembler
86
+ from sparknlp.annotator import BertSentenceEmbeddings, ClassifierDLModel
87
+ from pyspark.ml import Pipeline
88
+
89
+ # Step 1: Document Assembler
90
+ document = DocumentAssembler()\\
91
+ .setInputCol("text")\\
92
+ .setOutputCol("document")
93
+
94
+ # Step 2: Sentence Embeddings
95
+ embeddings = BertSentenceEmbeddings\\
96
+ .pretrained('sent_bert_wiki_books_sst2', 'en') \\
97
+ .setInputCols(["document"])\\
98
+ .setOutputCol("sentence_embeddings")
99
+
100
+ # Step 3: Sentiment Classifier
101
+ sentimentClassifier = ClassifierDLModel.pretrained("classifierdl_bertwiki_finance_sentiment", "en") \\
102
+ .setInputCols(["sentence_embeddings"]) \\
103
+ .setOutputCol("class_")
104
+
105
+ # Define the pipeline
106
+ financial_sentiment_pipeline = Pipeline(
107
+ stages=[document,
108
+ embeddings,
109
+ sentimentClassifier])
110
+
111
+ # Sample Data
112
+ data = spark.createDataFrame(
113
+ [["The company reported a significant increase in revenue for the last quarter."]],
114
+ ["text"]
115
+ )
116
+
117
+ # Fit-transform to get predictions
118
+ result = financial_sentiment_pipeline.fit(data).transform(data)
119
+ result.select("text","class_.result").show(truncate=False)
120
+ ''', language='python')
121
+
122
+ st.text("""
123
+ +----------------------------------------------------------------------------+----------+
124
+ |text |result |
125
+ +----------------------------------------------------------------------------+----------+
126
+ |The company reported a significant increase in revenue for the last quarter.|[positive]|
127
+ +----------------------------------------------------------------------------+----------+
128
+ """)
129
+
130
+ st.markdown("""
131
+ <p>The code snippet demonstrates how to set up a pipeline in Spark NLP to perform financial sentiment analysis on text data using pre-trained models. The resulting DataFrame contains the sentiment predictions.</p>
132
+ """, unsafe_allow_html=True)
133
+
134
+ # Conclusion
135
+ st.markdown("""
136
+ <div class="section">
137
+ <h2>Conclusion</h2>
138
+ <p>In this app, we demonstrated how to use Spark NLP's pre-trained models to perform financial sentiment analysis on text data. By integrating these models into your NLP pipelines, you can gain valuable insights into market sentiment and make informed financial decisions.</p>
139
+ </div>
140
+ """, unsafe_allow_html=True)
141
+
142
+ # References and Additional Information
143
+ st.markdown('<div class="sub-title">For additional information, please check the following references.</div>', unsafe_allow_html=True)
144
+
145
+ st.markdown("""
146
+ <div class="section">
147
+ <ul>
148
+ <li>Documentation :&nbsp;<a class="link" href="https://nlp.johnsnowlabs.com/docs/en/transformers#sentiment" target="_blank" rel="noopener">Financial Sentiment Analysis</a></li>
149
+ <li>Python Docs :&nbsp;<a class="link" href="https://nlp.johnsnowlabs.com/api/python/reference/autosummary/sparknlp/annotator/classifierdl/index.html#sparknlp.annotator.ClassifierDLModel" target="_blank" rel="noopener">ClassifierDLModel</a></li>
150
+ <li>Scala Docs :&nbsp;<a class="link" href="https://nlp.johnsnowlabs.com/api/com/johnsnowlabs/nlp/annotators/classifierdl/index.html" target="_blank" rel="noopener">ClassifierDLModel</a></li>
151
+ <li>Example Notebook :&nbsp;<a class="link" href="https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/streamlit_notebooks/SENTIMENT_EN_FINANCE.ipynb" target="_blank" rel="noopener">Financial Sentiment Analysis</a></li>
152
+ <li>Reference Article :&nbsp;<a class="link" href="https://medium.com/spark-nlp/financial-sentiment-analysis-using-sparknlp-achieving-95-accuracy-e2df27744617" target="_blank" rel="noopener">Financial Sentiment Analysis Using SparkNLP Achieving 95% Accuracy</a></li>
153
+ </ul>
154
+ </div>
155
+ """, unsafe_allow_html=True)
156
+
157
+ st.markdown('<div class="sub-title">Community & Support</div>', unsafe_allow_html=True)
158
+ st.markdown("""
159
+ <div class="section">
160
+ <ul>
161
+ <li><a class="link" href="https://sparknlp.org/" target="_blank">Official Website</a>: Documentation and examples</li>
162
+ <li><a class="link" href="https://join.slack.com/t/spark-nlp/shared_invite/zt-198dipu77-L3UWNe_AJ8xqDk0ivmih5Q" target="_blank">Slack</a>: Live discussion with the community and team</li>
163
+ <li><a class="link" href="https://github.com/JohnSnowLabs/spark-nlp" target="_blank">GitHub</a>: Bug reports, feature requests, and contributions</li>
164
+ <li><a class="link" href="https://medium.com/spark-nlp" target="_blank">Medium</a>: Spark NLP articles</li>
165
+ <li><a class="link" href="https://www.youtube.com/channel/UCmFOjlpYEhxf_wJUDuz6xxQ/videos" target="_blank">YouTube</a>: Video tutorials</li>
166
+ </ul>
167
+ </div>
168
+ """, unsafe_allow_html=True)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ numpy
4
+ spark-nlp
5
+ pyspark