cdgranadillo commited on
Commit
04de01b
·
1 Parent(s): c23aaf6

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -0
app.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import re
3
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
+
5
+ # usage tracking:
6
+ from sheet2api import Sheet2APIClient
7
+ from datetime import datetime
8
+ client = Sheet2APIClient(api_url='https://sheet2api.com/v1/hwp4AVQlOawy/summarizer')
9
+
10
+ def generate_summary(text):
11
+
12
+ # usage tracking:
13
+ client.create_row(row={'Timestamp': str(datetime.now())})
14
+
15
+ # summary generator:
16
+ WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))
17
+
18
+ model_name = "csebuetnlp/mT5_multilingual_XLSum"
19
+ #tokenizer = AutoTokenizer.from_pretrained(model_name)
20
+ tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
21
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
22
+
23
+ input_ids = tokenizer(
24
+ [WHITESPACE_HANDLER(text)],
25
+ return_tensors="pt",
26
+ padding="max_length",
27
+ truncation=True,
28
+ max_length=512
29
+ )["input_ids"]
30
+
31
+ output_ids = model.generate(
32
+ input_ids=input_ids,
33
+ max_length=84,
34
+ no_repeat_ngram_size=2,
35
+ num_beams=4
36
+ )[0]
37
+
38
+ summary = tokenizer.decode(
39
+ output_ids,
40
+ skip_special_tokens=True,
41
+ clean_up_tokenization_spaces=False
42
+ )
43
+
44
+ return summary
45
+
46
+
47
+ demo = gr.Interface(fn=generate_summary,
48
+ inputs=gr.Textbox(lines=10, placeholder="Insert the text here"),
49
+ outputs=gr.Textbox(lines=4)
50
+ )
51
+
52
+ demo.launch()