Spaces:

datasciencedojo
/

Paraphrasing

Running

App Files Files Community

datasciencedojo commited on Oct 19, 2022

Commit

8f95c55

1 Parent(s): 23f7624

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -9

app.py CHANGED Viewed

@@ -8,16 +8,35 @@ torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'
 tokenizer = PegasusTokenizer.from_pretrained(model_name)
 model = PegasusForConditionalGeneration.from_pretrained(model_name).to(torch_device)
-def paraphrase_text(input_text):
-    num_return_sequences = 3
-    batch = tokenizer.prepare_seq2seq_batch([input_text], truncation=True, padding='longest', max_length=60,
                                             return_tensors="pt").to(torch_device)
-    translated = model.generate(**batch, max_length=60, num_beams=10, num_return_sequences=num_return_sequences,
                                 temperature=1.5)
-    paraphrased_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
-    return paraphrased_text[0], paraphrased_text[1], paraphrased_text[2]
-examples = [["Uploading a video to YouTube can help exposure for your business."], ["Niagara Falls is viewed by thousands of tourists every year."]]
 css = """
 footer {display:none !important}
@@ -69,6 +88,6 @@ footer {display:none !important}
 }
 """
-demo = gr.Interface(fn=paraphrase_text, inputs=gr.Textbox(lines=3, placeholder="Enter sample text here", label="Original text"), outputs=[gr.Textbox(label="Paraphrasing 1"), gr.Textbox(label="Paraphrasing 2"), gr.Textbox(label="Paraphrasing 3")], title="Paraphrasing | Datascience Dojo", examples=examples, css=css)
-demo.launch( debug = True )

 tokenizer = PegasusTokenizer.from_pretrained(model_name)
 model = PegasusForConditionalGeneration.from_pretrained(model_name).to(torch_device)
+def get_response(input_text, num_return_sequences):
+    batch = tokenizer.prepare_seq2seq_batch([input_text], truncation=True, padding='longest', max_length=10000,
                                             return_tensors="pt").to(torch_device)
+    translated = model.generate(**batch, num_beams=10, num_return_sequences=num_return_sequences,
                                 temperature=1.5)
+    tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
+    return tgt_text
+def get_response_from_text(
+        context="I am a student at the University of Washington. I am taking a course called Data Science."):
+    splitter = SentenceSplitter(language='en')
+    sentence_list = splitter.split(context)
+    paraphrase = []
+    for i in sentence_list:
+        a = get_response(i, 1)
+        paraphrase.append(a)
+    paraphrase2 = [' '.join(x) for x in paraphrase]
+    paraphrase3 = [' '.join(x for x in paraphrase2)]
+    paraphrased_text = str(paraphrase3).strip('[]').strip("'")
+    return paraphrased_text
+def greet(context):
+    return get_response_from_text(context)
+examples = [["Begin your professional career by learning data science skills with Data science Dojo, a globally recognized e-learning platform where we teach students how to learn data science, data analytics, machine learning and more."], ["Natural language processing (NLP) is a subfield of linguistics, computer science, and artificial intelligence concerned with the interactions between computers and human language, in particular how to program computers to process and analyze large amounts of natural language data."]]
 css = """
 footer {display:none !important}
 }
 """
+demo = gr.Interface(fn=greet, inputs=gr.Textbox(lines=3, placeholder="Enter sample text here", label="Original text"), outputs=gr.Textbox(label="Paraphrasing"), title="Paraphrasing | Datascience Dojo", examples=examples, css=css)
+demo.launch()