|
import streamlit as st |
|
from transformers import PegasusForConditionalGeneration, PegasusTokenizer |
|
|
|
st.title("Paraphrase sentences") |
|
|
|
model_name = "tuner007/pegasus_paraphrase" |
|
torch_device = "cpu" |
|
tokenizer = PegasusTokenizer.from_pretrained(model_name) |
|
|
|
|
|
@st.cache(allow_output_mutation=True) |
|
def load_model(): |
|
model = PegasusForConditionalGeneration.from_pretrained(model_name).to(torch_device) |
|
return model |
|
|
|
|
|
def get_response( |
|
input_text, num_return_sequences, num_beams, max_length=60, temperature=1.5 |
|
): |
|
model = load_model() |
|
batch = tokenizer( |
|
[input_text], |
|
truncation=True, |
|
padding="longest", |
|
max_length=max_length, |
|
return_tensors="pt", |
|
).to(torch_device) |
|
translated = model.generate( |
|
**batch, |
|
max_length=max_length, |
|
num_beams=num_beams, |
|
num_return_sequences=num_return_sequences, |
|
temperature=temperature |
|
) |
|
tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True) |
|
return tgt_text |
|
|
|
|
|
num_beams = 10 |
|
num_return_sequences = st.slider("Number of paraphrases", 1, 10, 5, 1) |
|
context = st.text_area(label="Enter a sentence to paraphrase", max_chars=384) |
|
|
|
with st.expander("Advanced"): |
|
temperature = st.slider("Temperature", 0.1, 5.0, 1.5, 0.1) |
|
max_length = st.slider("Max length", 10, 100, 60, 10) |
|
if context: |
|
response = get_response( |
|
context, num_return_sequences, num_beams, max_length, temperature |
|
) |
|
|
|
for paraphrase in response: |
|
st.write(paraphrase) |