Spaces:
Sleeping
Sleeping
File size: 4,084 Bytes
8ec3ac3 20f8b39 8ec3ac3 20f8b39 8ec3ac3 20f8b39 8ec3ac3 20f8b39 8ec3ac3 20f8b39 8ec3ac3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 |
import torch
from transformers import PegasusForConditionalGeneration, PegasusTokenizer
import gradio as gr
# Load the tokenizer and model once when the app starts
model_name = 'tuner007/pegasus_paraphrase'
torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'
# Initialize tokenizer and model
tokenizer = PegasusTokenizer.from_pretrained(model_name)
model = PegasusForConditionalGeneration.from_pretrained(model_name).to(torch_device)
def get_response(input_text, num_return_sequences=1, num_beams=3):
"""
Generate paraphrased text for a given input_text using the Pegasus model.
Args:
input_text (str): The text to paraphrase.
num_return_sequences (int): Number of paraphrased sequences to return.
num_beams (int): Number of beams for beam search.
Returns:
list: A list containing paraphrased text strings.
"""
# Tokenize the input text
batch = tokenizer(
[input_text],
truncation=True,
padding='longest',
max_length=60,
return_tensors="pt"
).to(torch_device)
# Generate paraphrased outputs
translated = model.generate(
**batch,
max_length=60,
num_beams=num_beams,
num_return_sequences=num_return_sequences,
temperature=0.7
)
# Decode the generated tokens
tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
return tgt_text
def split_text_by_fullstop(text):
"""
Split the input text into sentences based on full stops.
Args:
text (str): The text to split.
Returns:
list: A list of sentences.
"""
sentences = [sentence.strip() for sentence in text.split('.') if sentence]
return sentences
def process_text_by_fullstop(text, num_return_sequences=1, num_beams=3):
"""
Process the input text by splitting it into sentences and paraphrasing each sentence.
Args:
text (str): The text to paraphrase.
num_return_sequences (int): Number of paraphrased sequences per sentence.
num_beams (int): Number of beams for beam search.
Returns:
str: The paraphrased text.
"""
sentences = split_text_by_fullstop(text)
paraphrased_sentences = []
for sentence in sentences:
# Ensure each sentence ends with a period
sentence = sentence + '.' if not sentence.endswith('.') else sentence
paraphrases = get_response(sentence, num_return_sequences, num_beams)
paraphrased_sentences.extend(paraphrases)
# Join all paraphrased sentences into a single string
return ' '.join(paraphrased_sentences)
def paraphrase(text, num_beams, num_return_sequences):
"""
Interface function to paraphrase input text based on user parameters.
Args:
text (str): The input text to paraphrase.
num_beams (int): Number of beams for beam search.
num_return_sequences (int): Number of paraphrased sequences to return.
Returns:
str: The paraphrased text.
"""
return process_text_by_fullstop(text, num_return_sequences, num_beams)
# Define the Gradio interface
iface = gr.Interface(
fn=paraphrase,
inputs=[
gr.components.Textbox(
lines=10,
placeholder="Enter text here...",
label="Input Text"
),
gr.components.Slider(
minimum=1,
maximum=10,
step=1,
value=3,
label="Number of Beams"
),
gr.components.Slider(
minimum=1,
maximum=5,
step=1,
value=1,
label="Number of Return Sequences"
)
],
outputs=gr.components.Textbox(
lines=10,
label="Paraphrased Text"
),
title="Text Paraphrasing App",
description="Enter your text and adjust the parameters to receive paraphrased versions using the Pegasus model.",
allow_flagging="never"
)
# Launch the app
if __name__ == "__main__":
iface.launch()
|