datasciencedojo commited on
Commit
8f95c55
1 Parent(s): 23f7624

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -9
app.py CHANGED
@@ -8,16 +8,35 @@ torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'
8
  tokenizer = PegasusTokenizer.from_pretrained(model_name)
9
  model = PegasusForConditionalGeneration.from_pretrained(model_name).to(torch_device)
10
 
11
- def paraphrase_text(input_text):
12
- num_return_sequences = 3
13
- batch = tokenizer.prepare_seq2seq_batch([input_text], truncation=True, padding='longest', max_length=60,
14
  return_tensors="pt").to(torch_device)
15
- translated = model.generate(**batch, max_length=60, num_beams=10, num_return_sequences=num_return_sequences,
16
  temperature=1.5)
17
- paraphrased_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
18
- return paraphrased_text[0], paraphrased_text[1], paraphrased_text[2]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
- examples = [["Uploading a video to YouTube can help exposure for your business."], ["Niagara Falls is viewed by thousands of tourists every year."]]
21
 
22
  css = """
23
  footer {display:none !important}
@@ -69,6 +88,6 @@ footer {display:none !important}
69
  }
70
  """
71
 
72
- demo = gr.Interface(fn=paraphrase_text, inputs=gr.Textbox(lines=3, placeholder="Enter sample text here", label="Original text"), outputs=[gr.Textbox(label="Paraphrasing 1"), gr.Textbox(label="Paraphrasing 2"), gr.Textbox(label="Paraphrasing 3")], title="Paraphrasing | Datascience Dojo", examples=examples, css=css)
73
 
74
- demo.launch( debug = True )
 
8
  tokenizer = PegasusTokenizer.from_pretrained(model_name)
9
  model = PegasusForConditionalGeneration.from_pretrained(model_name).to(torch_device)
10
 
11
+
12
+ def get_response(input_text, num_return_sequences):
13
+ batch = tokenizer.prepare_seq2seq_batch([input_text], truncation=True, padding='longest', max_length=10000,
14
  return_tensors="pt").to(torch_device)
15
+ translated = model.generate(**batch, num_beams=10, num_return_sequences=num_return_sequences,
16
  temperature=1.5)
17
+ tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
18
+ return tgt_text
19
+
20
+
21
+ def get_response_from_text(
22
+ context="I am a student at the University of Washington. I am taking a course called Data Science."):
23
+ splitter = SentenceSplitter(language='en')
24
+ sentence_list = splitter.split(context)
25
+
26
+ paraphrase = []
27
+
28
+ for i in sentence_list:
29
+ a = get_response(i, 1)
30
+ paraphrase.append(a)
31
+ paraphrase2 = [' '.join(x) for x in paraphrase]
32
+ paraphrase3 = [' '.join(x for x in paraphrase2)]
33
+ paraphrased_text = str(paraphrase3).strip('[]').strip("'")
34
+ return paraphrased_text
35
+
36
+ def greet(context):
37
+ return get_response_from_text(context)
38
 
39
+ examples = [["Begin your professional career by learning data science skills with Data science Dojo, a globally recognized e-learning platform where we teach students how to learn data science, data analytics, machine learning and more."], ["Natural language processing (NLP) is a subfield of linguistics, computer science, and artificial intelligence concerned with the interactions between computers and human language, in particular how to program computers to process and analyze large amounts of natural language data."]]
40
 
41
  css = """
42
  footer {display:none !important}
 
88
  }
89
  """
90
 
91
+ demo = gr.Interface(fn=greet, inputs=gr.Textbox(lines=3, placeholder="Enter sample text here", label="Original text"), outputs=gr.Textbox(label="Paraphrasing"), title="Paraphrasing | Datascience Dojo", examples=examples, css=css)
92
 
93
+ demo.launch()