import streamlit as st import sparknlp import os import pandas as pd from sparknlp.base import * from sparknlp.annotator import * from pyspark.ml import Pipeline from sparknlp.pretrained import PretrainedPipeline # Page configuration st.set_page_config( layout="wide", initial_sidebar_state="auto" ) # CSS for styling st.markdown(""" """, unsafe_allow_html=True) @st.cache_resource def init_spark(): return sparknlp.start() @st.cache_resource def create_pipeline(model): document_assembler = DocumentAssembler()\ .setInputCol("text")\ .setOutputCol("document") sentence_detector = SentenceDetector() \ .setInputCols(["document"]) \ .setOutputCol("sentence") tokenizer = Tokenizer() \ .setInputCols(["sentence"]) \ .setOutputCol("token") word_embeddings = WordEmbeddingsModel()\ .pretrained('urduvec_140M_300d', 'ur')\ .setInputCols(["sentence",'token'])\ .setOutputCol("word_embeddings") sentence_embeddings = SentenceEmbeddings() \ .setInputCols(["sentence", "word_embeddings"]) \ .setOutputCol("sentence_embeddings") \ .setPoolingStrategy("AVERAGE") classifier = SentimentDLModel.pretrained('sentimentdl_urduvec_imdb', 'ur' )\ .setInputCols(['sentence_embeddings'])\ .setOutputCol('sentiment') nlpPipeline = Pipeline( stages=[ document_assembler, sentence_detector, tokenizer, word_embeddings, sentence_embeddings, classifier ]) return nlpPipeline def fit_data(pipeline, data): empty_df = spark.createDataFrame([['']]).toDF('text') pipeline_model = pipeline.fit(empty_df) model = LightPipeline(pipeline_model) results = model.fullAnnotate(data)[0] return results['sentiment'][0].result # Set up the page layout st.markdown('