File size: 4,084 Bytes
8ec3ac3
 
20f8b39
8ec3ac3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20f8b39
8ec3ac3
 
 
 
 
 
 
 
 
 
 
 
 
 
20f8b39
8ec3ac3
 
 
 
 
 
 
20f8b39
 
8ec3ac3
20f8b39
8ec3ac3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import torch
from transformers import PegasusForConditionalGeneration, PegasusTokenizer
import gradio as gr

# Load the tokenizer and model once when the app starts
model_name = 'tuner007/pegasus_paraphrase'
torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Initialize tokenizer and model
tokenizer = PegasusTokenizer.from_pretrained(model_name)
model = PegasusForConditionalGeneration.from_pretrained(model_name).to(torch_device)

def get_response(input_text, num_return_sequences=1, num_beams=3):
    """
    Generate paraphrased text for a given input_text using the Pegasus model.
    
    Args:
        input_text (str): The text to paraphrase.
        num_return_sequences (int): Number of paraphrased sequences to return.
        num_beams (int): Number of beams for beam search.
        
    Returns:
        list: A list containing paraphrased text strings.
    """
    # Tokenize the input text
    batch = tokenizer(
        [input_text],
        truncation=True,
        padding='longest',
        max_length=60,
        return_tensors="pt"
    ).to(torch_device)
    
    # Generate paraphrased outputs
    translated = model.generate(
        **batch,
        max_length=60,
        num_beams=num_beams,
        num_return_sequences=num_return_sequences,
        temperature=0.7
    )
    
    # Decode the generated tokens
    tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
    return tgt_text

def split_text_by_fullstop(text):
    """
    Split the input text into sentences based on full stops.
    
    Args:
        text (str): The text to split.
        
    Returns:
        list: A list of sentences.
    """
    sentences = [sentence.strip() for sentence in text.split('.') if sentence]
    return sentences

def process_text_by_fullstop(text, num_return_sequences=1, num_beams=3):
    """
    Process the input text by splitting it into sentences and paraphrasing each sentence.
    
    Args:
        text (str): The text to paraphrase.
        num_return_sequences (int): Number of paraphrased sequences per sentence.
        num_beams (int): Number of beams for beam search.
        
    Returns:
        str: The paraphrased text.
    """
    sentences = split_text_by_fullstop(text)
    paraphrased_sentences = []
    
    for sentence in sentences:
        # Ensure each sentence ends with a period
        sentence = sentence + '.' if not sentence.endswith('.') else sentence
        paraphrases = get_response(sentence, num_return_sequences, num_beams)
        paraphrased_sentences.extend(paraphrases)
    
    # Join all paraphrased sentences into a single string
    return ' '.join(paraphrased_sentences)

def paraphrase(text, num_beams, num_return_sequences):
    """
    Interface function to paraphrase input text based on user parameters.
    
    Args:
        text (str): The input text to paraphrase.
        num_beams (int): Number of beams for beam search.
        num_return_sequences (int): Number of paraphrased sequences to return.
        
    Returns:
        str: The paraphrased text.
    """
    return process_text_by_fullstop(text, num_return_sequences, num_beams)

# Define the Gradio interface
iface = gr.Interface(
    fn=paraphrase,
    inputs=[
        gr.components.Textbox(
            lines=10, 
            placeholder="Enter text here...", 
            label="Input Text"
        ),
        gr.components.Slider(
            minimum=1, 
            maximum=10, 
            step=1, 
            value=3, 
            label="Number of Beams"
        ),
        gr.components.Slider(
            minimum=1, 
            maximum=5, 
            step=1, 
            value=1, 
            label="Number of Return Sequences"
        )
    ],
    outputs=gr.components.Textbox(
        lines=10, 
        label="Paraphrased Text"
    ),
    title="Text Paraphrasing App",
    description="Enter your text and adjust the parameters to receive paraphrased versions using the Pegasus model.",
    allow_flagging="never"
)

# Launch the app
if __name__ == "__main__":
    iface.launch()