File size: 1,262 Bytes
523d9cf
 
 
 
 
 
 
 
ed16e15
 
345ff01
f9bc862
523d9cf
 
 
 
3747804
523d9cf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2aee9b8
dd1a287
523d9cf
5a150c9
523d9cf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import gradio as gr
import pandas as pd
import os
import time
import torch
from transformers import pipeline, GPT2Tokenizer, OPTForCausalLM
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

torch.cuda.empty_cache()

model=OPTForCausalLM.from_pretrained("pushkarraj/opt_paraphraser")
tokenizer=GPT2Tokenizer.from_pretrained("facebook/opt-1.3b",truncation=True)

generator=pipeline("text-generation",model=model,tokenizer=tokenizer,device=device)

def cleaned_para(input_sentence):
  p=generator('<s>'+input_sentence+ '</s>>>>><p>',do_sample=True,max_length=len(input_sentence.split(" "))+200,temperature = 0.9,repetition_penalty=1.2)
  return p[0]['generated_text'].split('</s>>>>><p>')[1].split('</p>')[0]

from spacy.lang.en import English # updated

def sentensizer(raw_text):
  nlp = English()
  nlp.add_pipe("sentencizer") # updated
  doc = nlp(raw_text)
  sentences = [sent for sent in doc.sents]
  print(sentences)
  return sentences


def paraphraser(text):
  begin=time.time()
  x=[cleaned_para(str(i)) for i in sentensizer(text)]
  end=time.time()
  print(end-begin)
  return ("".join(x))

interface=gr.Interface(fn=paraphraser,inputs="text",outputs=["text"],title="Paraphraser",description="A paraphrasing tool")
interface.launch()