Spaces:
Sleeping
Sleeping
import torch | |
from transformers import PegasusForConditionalGeneration, PegasusTokenizer | |
import gradio as gr | |
# Load the tokenizer and model once when the app starts | |
model_name = 'tuner007/pegasus_paraphrase' | |
torch_device = 'cuda' if torch.cuda.is_available() else 'cpu' | |
# Initialize tokenizer and model | |
tokenizer = PegasusTokenizer.from_pretrained(model_name) | |
model = PegasusForConditionalGeneration.from_pretrained(model_name).to(torch_device) | |
def get_response(input_text, num_return_sequences=1, num_beams=3): | |
""" | |
Generate paraphrased text for a given input_text using the Pegasus model. | |
Args: | |
input_text (str): The text to paraphrase. | |
num_return_sequences (int): Number of paraphrased sequences to return. | |
num_beams (int): Number of beams for beam search. | |
Returns: | |
list: A list containing paraphrased text strings. | |
""" | |
# Tokenize the input text | |
batch = tokenizer( | |
[input_text], | |
truncation=True, | |
padding='longest', | |
max_length=60, | |
return_tensors="pt" | |
).to(torch_device) | |
# Generate paraphrased outputs | |
translated = model.generate( | |
**batch, | |
max_length=60, | |
num_beams=num_beams, | |
num_return_sequences=num_return_sequences, | |
temperature=0.7 | |
) | |
# Decode the generated tokens | |
tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True) | |
return tgt_text | |
def split_text_by_fullstop(text): | |
""" | |
Split the input text into sentences based on full stops. | |
Args: | |
text (str): The text to split. | |
Returns: | |
list: A list of sentences. | |
""" | |
sentences = [sentence.strip() for sentence in text.split('.') if sentence] | |
return sentences | |
def process_text_by_fullstop(text, num_return_sequences=1, num_beams=3): | |
""" | |
Process the input text by splitting it into sentences and paraphrasing each sentence. | |
Args: | |
text (str): The text to paraphrase. | |
num_return_sequences (int): Number of paraphrased sequences per sentence. | |
num_beams (int): Number of beams for beam search. | |
Returns: | |
str: The paraphrased text. | |
""" | |
sentences = split_text_by_fullstop(text) | |
paraphrased_sentences = [] | |
for sentence in sentences: | |
# Ensure each sentence ends with a period | |
sentence = sentence + '.' if not sentence.endswith('.') else sentence | |
paraphrases = get_response(sentence, num_return_sequences, num_beams) | |
paraphrased_sentences.extend(paraphrases) | |
# Join all paraphrased sentences into a single string | |
return ' '.join(paraphrased_sentences) | |
def paraphrase(text, num_beams, num_return_sequences): | |
""" | |
Interface function to paraphrase input text based on user parameters. | |
Args: | |
text (str): The input text to paraphrase. | |
num_beams (int): Number of beams for beam search. | |
num_return_sequences (int): Number of paraphrased sequences to return. | |
Returns: | |
str: The paraphrased text. | |
""" | |
return process_text_by_fullstop(text, num_return_sequences, num_beams) | |
# Define the Gradio interface | |
iface = gr.Interface( | |
fn=paraphrase, | |
inputs=[ | |
gr.components.Textbox( | |
lines=10, | |
placeholder="Enter text here...", | |
label="Input Text" | |
), | |
gr.components.Slider( | |
minimum=1, | |
maximum=10, | |
step=1, | |
value=3, | |
label="Number of Beams" | |
), | |
gr.components.Slider( | |
minimum=1, | |
maximum=5, | |
step=1, | |
value=1, | |
label="Number of Return Sequences" | |
) | |
], | |
outputs=gr.components.Textbox( | |
lines=10, | |
label="Paraphrased Text" | |
), | |
title="Text Paraphrasing App", | |
description="Enter your text and adjust the parameters to receive paraphrased versions using the Pegasus model.", | |
allow_flagging="never" | |
) | |
# Launch the app | |
if __name__ == "__main__": | |
iface.launch() | |