File size: 1,513 Bytes
ea7f5b6
 
 
 
 
 
 
 
 
c98e974
ea7f5b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6b8fb92
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# Function to Initialize the Model
def init_model():
    para_tokenizer = AutoTokenizer.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")
    para_model = AutoModelForSeq2SeqLM.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")
    return para_tokenizer, para_model

# Function to Paraphrase the Text
def paraphrase(question, para_tokenizer, para_model, num_beams=10, num_beam_groups=10, num_return_sequences=10, repetition_penalty=10.0, diversity_penalty=3.0, no_repeat_ngram_size=2, temperature=0.7, max_length=64):
    input_ids = para_tokenizer(
        f'paraphrase: {question}',
        return_tensors="pt", padding="longest",
        max_length=max_length,
        truncation=True,
    ).input_ids
    outputs = para_model.generate(
        input_ids, temperature=temperature, repetition_penalty=repetition_penalty,
        num_return_sequences=num_return_sequences, no_repeat_ngram_size=no_repeat_ngram_size,
        num_beams=num_beams, num_beam_groups=num_beam_groups,
        max_length=max_length, diversity_penalty=diversity_penalty
    )
    res = para_tokenizer.batch_decode(outputs, skip_special_tokens=True)
    return res

def generate_paraphrase(question):
    para_tokenizer, para_model = init_model()
    res = paraphrase(question, para_tokenizer, para_model)
    return res

print(generate_paraphrase("A special committee of Paramount’s board is said to have approved a tentative deal to merge with Skydance."))