vijay399 commited on
Commit
f2a57a2
·
1 Parent(s): 22dd122

Update src/paraphrase/Paraphrase.py

Browse files
Files changed (1) hide show
  1. src/paraphrase/Paraphrase.py +7 -15
src/paraphrase/Paraphrase.py CHANGED
@@ -1,15 +1,11 @@
1
- from nltk.tokenize import sent_tokenize
2
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
  import torch
4
  import src.exception.Exception.Exception as ExceptionCustom
5
 
6
  METHOD = "PARAPHRASE"
7
 
8
- tokenizer = AutoTokenizer.from_pretrained("t5-base")
9
- model = AutoModelForSeq2SeqLM.from_pretrained("t5-base")
10
-
11
- # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
12
- # model.to(device)
13
 
14
  def paraphraseParaphraseMethod(requestValue : str):
15
  exception = ""
@@ -24,20 +20,16 @@ def paraphraseParaphraseMethod(requestValue : str):
24
  for SENTENCE in tokenized_sent_list:
25
  text = "paraphrase: " + SENTENCE
26
 
27
- encoding = tokenizer.encode_plus(text, pad_to_max_length=True, return_tensors="pt")
28
  input_ids, attention_masks = encoding["input_ids"], encoding["attention_mask"]
29
 
30
  beam_outputs = model.generate(
31
  input_ids=input_ids,
32
  attention_mask=attention_masks,
33
- do_sample=True,
34
  max_length=512,
35
- top_k=120,
36
- top_p=0.90,
37
- early_stopping=False,
38
- num_return_sequences=1,
39
- no_repeat_ngram_size=2,
40
- num_beams=1
41
  )
42
 
43
  for beam_output in beam_outputs:
 
1
+ from transformers import PegasusForConditionalGeneration, PegasusTokenizer
 
2
  import torch
3
  import src.exception.Exception.Exception as ExceptionCustom
4
 
5
  METHOD = "PARAPHRASE"
6
 
7
+ tokenizer = PegasusTokenizer.from_pretrained('google/pegasus-xsum')
8
+ model = PegasusForConditionalGeneration.from_pretrained('google/pegasus-xsum')
 
 
 
9
 
10
  def paraphraseParaphraseMethod(requestValue : str):
11
  exception = ""
 
20
  for SENTENCE in tokenized_sent_list:
21
  text = "paraphrase: " + SENTENCE
22
 
23
+ encoding = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
24
  input_ids, attention_masks = encoding["input_ids"], encoding["attention_mask"]
25
 
26
  beam_outputs = model.generate(
27
  input_ids=input_ids,
28
  attention_mask=attention_masks,
 
29
  max_length=512,
30
+ num_beams=5,
31
+ length_penalty=0.8,
32
+ early_stopping=True
 
 
 
33
  )
34
 
35
  for beam_output in beam_outputs: