cdgranadillo commited on
Commit
b58669a
·
1 Parent(s): 3e39b83

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -0
app.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import re
3
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
+ from sheet2api import Sheet2APIClient
5
+ from datetime import datetime
6
+ from custom_functions import preprocessing_text
7
+
8
+ client = Sheet2APIClient(api_url='https://sheet2api.com/v1/hwp4AVQlOawy/summarizer')
9
+
10
+ def generate_summary(text):
11
+
12
+ client.create_row(row={'Timestamp': str(datetime.now())})
13
+
14
+ clean_text = preprocessing_text(text)
15
+
16
+ WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))
17
+
18
+ model_name = "csebuetnlp/mT5_multilingual_XLSum"
19
+ tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False) # use_fast was set to false
20
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
21
+
22
+ input_ids = tokenizer(
23
+ [WHITESPACE_HANDLER(clean_text)],
24
+ return_tensors="pt",
25
+ padding="max_length",
26
+ truncation=True,
27
+ max_length=512
28
+ )["input_ids"]
29
+
30
+ output_ids = model.generate(
31
+ input_ids=input_ids,
32
+ max_length=84,
33
+ no_repeat_ngram_size=2,
34
+ num_beams=4
35
+ )[0]
36
+
37
+ summary = tokenizer.decode(
38
+ output_ids,
39
+ skip_special_tokens=True,
40
+ clean_up_tokenization_spaces=False
41
+ )
42
+
43
+ return summary
44
+
45
+
46
+ demo = gr.Interface(fn=generate_summary,
47
+ inputs=gr.Textbox(lines=10, placeholder="Insert the text here"),
48
+ outputs=gr.Textbox(lines=4)
49
+ )
50
+
51
+ demo.launch()