import streamlit as st import sparknlp import os import pandas as pd from sparknlp.base import * from sparknlp.annotator import * from pyspark.ml import Pipeline from sparknlp.pretrained import PretrainedPipeline # Page configuration st.set_page_config( layout="wide", page_title="Spark NLP Financial Sentiment Analysis", initial_sidebar_state="expanded" ) # CSS for styling st.markdown(""" """, unsafe_allow_html=True) @st.cache_resource def init_spark(): return sparknlp.start() @st.cache_resource def create_pipeline(model): document = DocumentAssembler()\ .setInputCol("text")\ .setOutputCol("document") embeddings = BertSentenceEmbeddings\ .pretrained('sent_bert_wiki_books_sst2', 'en') \ .setInputCols(["document"])\ .setOutputCol("sentence_embeddings") sentimentClassifier = ClassifierDLModel.pretrained("classifierdl_bertwiki_finance_sentiment", "en") \ .setInputCols(["sentence_embeddings"]) \ .setOutputCol("class_") financial_sentiment_pipeline = Pipeline( stages=[document, embeddings, sentimentClassifier]) return financial_sentiment_pipeline def fit_data(pipeline, data): empty_df = spark.createDataFrame([['']]).toDF('text') pipeline_model = pipeline.fit(empty_df) model = LightPipeline(pipeline_model) results = model.fullAnnotate(data)[0] return results['class_'][0].result # Set up the page layout st.markdown('
Sentiment Analysis of Financial News with Spark NLP
', unsafe_allow_html=True) # Sidebar content model = st.sidebar.selectbox( "Choose the pretrained model", ["classifierdl_bertwiki_finance_sentiment"], help="For more info about the models visit: https://sparknlp.org/models" ) # Reference notebook link in sidebar colab_link = """ Open In Colab """ st.sidebar.markdown('Reference notebook:') st.sidebar.markdown(colab_link, unsafe_allow_html=True) # Load examples examples = [ "In April 2005, Neste separated from its parent company, Finnish energy company Fortum, and became listed on the Helsinki Stock Exchange.", "Finnish IT solutions provider Affecto Oyj HEL: AFE1V said today it slipped to a net loss of EUR 115,000 USD 152,000 in the second quarter of 2010 from a profit of EUR 845,000 in the corresponding period a year earlier.", "10 February 2011 - Finnish media company Sanoma Oyj HEL: SAA1V said yesterday its 2010 net profit almost tripled to EUR297.3m from EUR107.1m for 2009 and announced a proposal for a raised payout.", "Profit before taxes decreased by 9% to EUR 187.8 mn in the first nine months of 2008, compared to EUR 207.1 mn a year earlier.", "The world's second largest stainless steel maker said net profit in the three-month period until Dec. 31 surged to euro603 million US$ 781 million, or euro3.33 US$ 4.31 per share, from euro172 million, or euro0.94 per share, the previous year.", "TietoEnator signed an agreement to acquire Indian research and development (R&D) services provider and turnkey software solutions developer Fortuna Technologies Pvt. Ltd. for 21 mln euro ($30.3 mln) in September 2007." ] # User input selection selected_text = st.selectbox("Select a sample", examples) custom_input = st.text_input("Try it for yourself!") # Use custom input if provided if custom_input: selected_text = custom_input st.subheader('Selected Text') st.write(selected_text) # Initialize Spark and create pipeline spark = init_spark() pipeline = create_pipeline(model) output = fit_data(pipeline, selected_text) # Display output sentiment if output.lower() in ['pos', 'positive']: st.markdown(f"""

This seems like positive news. 😃

""", unsafe_allow_html=True) elif output.lower() in ['neg', 'negative']: st.markdown(f"""

This seems like negative news. 😠

""", unsafe_allow_html=True) else: st.markdown(f"""

This seems like {output} news. 🙂

""", unsafe_allow_html=True)