File size: 5,032 Bytes
1611e10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import streamlit as st
import sparknlp
import os
import pandas as pd

from sparknlp.base import *
from sparknlp.annotator import *
from pyspark.ml import Pipeline
from sparknlp.pretrained import PretrainedPipeline

# Page configuration
st.set_page_config(
    layout="wide", 
    page_title="Spark NLP Financial Sentiment Analysis", 
    initial_sidebar_state="expanded"
)

# CSS for styling
st.markdown("""

    <style>

        .main-title {

            font-size: 36px;

            color: #4A90E2;

            font-weight: bold;

            text-align: center;

        }

        .section p, .section ul {

            color: #666666;

        }

        .result-positive {

            color: green;

        }

        .result-negative {

            color: red;

        }

        .result-neutral {

            color: #209DDC;

        }

    </style>

""", unsafe_allow_html=True)

@st.cache_resource
def init_spark():
    return sparknlp.start()

@st.cache_resource
def create_pipeline(model):
  document = DocumentAssembler()\
    .setInputCol("text")\
    .setOutputCol("document")

  embeddings = BertSentenceEmbeddings\
      .pretrained('sent_bert_wiki_books_sst2', 'en') \
      .setInputCols(["document"])\
      .setOutputCol("sentence_embeddings")

  sentimentClassifier = ClassifierDLModel.pretrained("classifierdl_bertwiki_finance_sentiment", "en") \
    .setInputCols(["sentence_embeddings"]) \
    .setOutputCol("class_")

  financial_sentiment_pipeline = Pipeline(
      stages=[document, 
              embeddings, 
              sentimentClassifier])
  
  return financial_sentiment_pipeline

def fit_data(pipeline, data):
    empty_df = spark.createDataFrame([['']]).toDF('text')
    pipeline_model = pipeline.fit(empty_df)
    model = LightPipeline(pipeline_model)
    results = model.fullAnnotate(data)[0]

    return results['class_'][0].result

# Set up the page layout
st.markdown('<div class="main-title">Sentiment Analysis of Financial News with Spark NLP</div>', unsafe_allow_html=True)

# Sidebar content
model = st.sidebar.selectbox(
    "Choose the pretrained model",
    ["classifierdl_bertwiki_finance_sentiment"],
    help="For more info about the models visit: https://sparknlp.org/models"
)

# Reference notebook link in sidebar
colab_link = """

<a href="https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/streamlit_notebooks/SENTIMENT_EN_FINANCE.ipynb">

    <img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/>

</a>

"""
st.sidebar.markdown('Reference notebook:')
st.sidebar.markdown(colab_link, unsafe_allow_html=True)

# Load examples
examples = [
    "In April 2005, Neste separated from its parent company, Finnish energy company Fortum, and became listed on the Helsinki Stock Exchange.",
    "Finnish IT solutions provider Affecto Oyj HEL: AFE1V said today it slipped to a net loss of EUR 115,000 USD 152,000 in the second quarter of 2010 from a profit of EUR 845,000 in the corresponding period a year earlier.",
    "10 February 2011 - Finnish media company Sanoma Oyj HEL: SAA1V said yesterday its 2010 net profit almost tripled to EUR297.3m from EUR107.1m for 2009 and announced a proposal for a raised payout.",
    "Profit before taxes decreased by 9% to EUR 187.8 mn in the first nine months of 2008, compared to EUR 207.1 mn a year earlier.",
    "The world's second largest stainless steel maker said net profit in the three-month period until Dec. 31 surged to euro603 million US$ 781 million, or euro3.33 US$ 4.31 per share, from euro172 million, or euro0.94 per share, the previous year.",
    "TietoEnator signed an agreement to acquire Indian research and development (R&D) services provider and turnkey software solutions developer Fortuna Technologies Pvt. Ltd. for 21 mln euro ($30.3 mln) in September 2007."
]

# User input selection
selected_text = st.selectbox("Select a sample", examples)
custom_input = st.text_input("Try it for yourself!")

# Use custom input if provided
if custom_input:
    selected_text = custom_input

st.subheader('Selected Text')
st.write(selected_text)

# Initialize Spark and create pipeline
spark = init_spark()
pipeline = create_pipeline(model)
output = fit_data(pipeline, selected_text)

# Display output sentiment
if output.lower() in ['pos', 'positive']:
    st.markdown(f"""<h3>This seems like <span class="result-positive">positive</span> news. <span style="font-size:35px;">&#128515;</span></h3>""", unsafe_allow_html=True)
elif output.lower() in ['neg', 'negative']:
    st.markdown(f"""<h3>This seems like <span class="result-negative">negative</span> news. <span style="font-size:35px;">&#128544;</span></h3>""", unsafe_allow_html=True)
else:
    st.markdown(f"""<h3>This seems like <span class="result-neutral">{output}</span> news. <span style="font-size:35px;">&#128578;</span></h3>""", unsafe_allow_html=True)