File size: 4,213 Bytes
d7e89a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import streamlit as st
import sparknlp

from sparknlp.base import *
from sparknlp.annotator import *
from pyspark.ml import Pipeline

# Page configuration
st.set_page_config(
    layout="wide", 
    initial_sidebar_state="auto"
)

# CSS for styling
st.markdown("""

    <style>

        .main-title {

            font-size: 36px;

            color: #4A90E2;

            font-weight: bold;

            text-align: center;

        }

        .section {

            background-color: #f9f9f9;

            padding: 10px;

            border-radius: 10px;

            margin-top: 10px;

        }

        .section p, .section ul {

            color: #666666;

        }

        .scroll {

            overflow-x: auto;

            border: 1px solid #e6e9ef;

            border-radius: 0.25rem;

            padding: 1rem;

            margin-bottom: 2.5rem;

            white-space: pre-wrap;

        }

    </style>

""", unsafe_allow_html=True)

@st.cache_resource
def init_spark():
    return sparknlp.start()

@st.cache_resource
def create_pipeline(model, task):
    documentAssembler = DocumentAssembler() \
        .setInputCol("text") \
        .setOutputCol("documents")

    t5 = T5Transformer.pretrained(model) \
        .setTask(task) \
        .setInputCols(["documents"]) \
        .setMaxOutputLength(200) \
        .setOutputCol("transfers")

    pipeline = Pipeline().setStages([documentAssembler, t5])
    return pipeline

def fit_data(pipeline, data):
    df = spark.createDataFrame([[data]]).toDF("text")
    result = pipeline.fit(df).transform(df)
    return result.select('transfers.result').collect()

# Sidebar setup
model = st.sidebar.selectbox(
    "Choose the Pretrained Model",
    ['t5_active_to_passive_styletransfer', 't5_passive_to_active_styletransfer'],
    help="Select the model you want to use for style transfer."
)

# Reference notebook link in sidebar
st.sidebar.markdown('Reference notebook:')
st.sidebar.markdown(
    """

    <a href="https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/tutorials/streamlit_notebooks/T5_LINGUISTIC.ipynb">

        <img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/>

    </a>

    """, 
    unsafe_allow_html=True
)

examples = {
    "t5_active_to_passive_styletransfer": [
        "I am writing you a letter.",
        "Reporters write news reports.",
        "The company will hire new workers.",
        "Emma writes a letter.",
        "We did not grow rice.",
        "People will admire him.",
        "Someone has stolen my purse."
    ],
    "t5_passive_to_active_styletransfer": [
        "At dinner, six shrimp were eaten by Harry.",
        "The savannah is roamed by beautiful giraffes.",
        "The flat tire was changed by Sue.",
        "The students' questions are always answered by the teacher."
    ]
}

task_descriptions = {
    "t5_active_to_passive_styletransfer": "Transfer Active to Passive:",
    "t5_passive_to_active_styletransfer": "Transfer Passive to Active:"
}

# Set up the page layout
title = "Switch Between Active and Passive Voice"
sub_title = "Effortlessly Transform Sentences and Explore Different Writing Styles"

st.markdown(f'<div class="main-title">{title}</div>', unsafe_allow_html=True)
st.markdown(f'<div style="text-align: center; color: #666666;">{sub_title}</div>', unsafe_allow_html=True)

# Text selection and analysis
selected_text = st.selectbox("Select an example", examples[model])
custom_input = st.text_input("Try it with your own sentence!")

text_to_analyze = custom_input if custom_input else selected_text

st.write('Text to analyze:')
st.markdown(f'<div class="scroll">{text_to_analyze}</div>', unsafe_allow_html=True)

# Initialize Spark and create pipeline
spark = init_spark()
pipeline = create_pipeline(model, task_descriptions[model])
output = fit_data(pipeline, text_to_analyze)

# Display transformed sentence
st.write("Predicted Sentence:")
output_text = "".join(output[0][0])
st.markdown(f'<div class="scroll">{output_text.title()}</div>', unsafe_allow_html=True)