File size: 4,213 Bytes
d7e89a9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
import streamlit as st
import sparknlp
from sparknlp.base import *
from sparknlp.annotator import *
from pyspark.ml import Pipeline
# Page configuration
st.set_page_config(
layout="wide",
initial_sidebar_state="auto"
)
# CSS for styling
st.markdown("""
<style>
.main-title {
font-size: 36px;
color: #4A90E2;
font-weight: bold;
text-align: center;
}
.section {
background-color: #f9f9f9;
padding: 10px;
border-radius: 10px;
margin-top: 10px;
}
.section p, .section ul {
color: #666666;
}
.scroll {
overflow-x: auto;
border: 1px solid #e6e9ef;
border-radius: 0.25rem;
padding: 1rem;
margin-bottom: 2.5rem;
white-space: pre-wrap;
}
</style>
""", unsafe_allow_html=True)
@st.cache_resource
def init_spark():
return sparknlp.start()
@st.cache_resource
def create_pipeline(model, task):
documentAssembler = DocumentAssembler() \
.setInputCol("text") \
.setOutputCol("documents")
t5 = T5Transformer.pretrained(model) \
.setTask(task) \
.setInputCols(["documents"]) \
.setMaxOutputLength(200) \
.setOutputCol("transfers")
pipeline = Pipeline().setStages([documentAssembler, t5])
return pipeline
def fit_data(pipeline, data):
df = spark.createDataFrame([[data]]).toDF("text")
result = pipeline.fit(df).transform(df)
return result.select('transfers.result').collect()
# Sidebar setup
model = st.sidebar.selectbox(
"Choose the Pretrained Model",
['t5_active_to_passive_styletransfer', 't5_passive_to_active_styletransfer'],
help="Select the model you want to use for style transfer."
)
# Reference notebook link in sidebar
st.sidebar.markdown('Reference notebook:')
st.sidebar.markdown(
"""
<a href="https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/streamlit_notebooks/T5_LINGUISTIC.ipynb">
<img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/>
</a>
""",
unsafe_allow_html=True
)
examples = {
"t5_active_to_passive_styletransfer": [
"I am writing you a letter.",
"Reporters write news reports.",
"The company will hire new workers.",
"Emma writes a letter.",
"We did not grow rice.",
"People will admire him.",
"Someone has stolen my purse."
],
"t5_passive_to_active_styletransfer": [
"At dinner, six shrimp were eaten by Harry.",
"The savannah is roamed by beautiful giraffes.",
"The flat tire was changed by Sue.",
"The students' questions are always answered by the teacher."
]
}
task_descriptions = {
"t5_active_to_passive_styletransfer": "Transfer Active to Passive:",
"t5_passive_to_active_styletransfer": "Transfer Passive to Active:"
}
# Set up the page layout
title = "Switch Between Active and Passive Voice"
sub_title = "Effortlessly Transform Sentences and Explore Different Writing Styles"
st.markdown(f'<div class="main-title">{title}</div>', unsafe_allow_html=True)
st.markdown(f'<div style="text-align: center; color: #666666;">{sub_title}</div>', unsafe_allow_html=True)
# Text selection and analysis
selected_text = st.selectbox("Select an example", examples[model])
custom_input = st.text_input("Try it with your own sentence!")
text_to_analyze = custom_input if custom_input else selected_text
st.write('Text to analyze:')
st.markdown(f'<div class="scroll">{text_to_analyze}</div>', unsafe_allow_html=True)
# Initialize Spark and create pipeline
spark = init_spark()
pipeline = create_pipeline(model, task_descriptions[model])
output = fit_data(pipeline, text_to_analyze)
# Display transformed sentence
st.write("Predicted Sentence:")
output_text = "".join(output[0][0])
st.markdown(f'<div class="scroll">{output_text.title()}</div>', unsafe_allow_html=True)
|