psyne commited on
Commit
3a5bd74
·
1 Parent(s): a499cfd

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -0
app.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import fitz
3
+ import gradio as gr
4
+ import re
5
+ from summarizer import Summarizer
6
+
7
+ def preprocess(text):
8
+ text = text.replace('\n', ' ')
9
+ text = re.sub('\s+', ' ', text)
10
+ return text
11
+
12
+
13
+ def pdf_to_text(path, start_page=1, end_page=None):
14
+ doc = fitz.open(path)
15
+ total_pages = doc.page_count
16
+
17
+ if end_page is None:
18
+ end_page = total_pages
19
+
20
+ text_list = []
21
+
22
+ for i in range(start_page-1, end_page):
23
+ text = doc.load_page(i).get_text("text")
24
+ text = preprocess(text)
25
+ text_list.append(text)
26
+
27
+ doc.close()
28
+ return ' '.join(text_list)
29
+
30
+
31
+ def generate_summary(text, model='bert-base-uncased', ratio=0.2):
32
+ model = Summarizer(model_name=model)
33
+ summary = model(text, ratio=ratio)
34
+ return summary
35
+
36
+
37
+ def pdf_summary(file, secret):
38
+ if secret != os.environ.get('Secret'):
39
+ return '[Error]: Please provide the correct secret'
40
+
41
+ elif file is None:
42
+ return '[ERROR]: Please upload a PDF file.'
43
+
44
+ else:
45
+ old_file_name = file.name
46
+ file_name = file.name
47
+ file_name = file_name[:-12] + file_name[-4:]
48
+ os.rename(old_file_name, file_name)
49
+ text = pdf_to_text(file_name)
50
+
51
+ if text.strip() == '':
52
+ return '[ERROR]: The content of PDF is empty.'
53
+
54
+ return generate_summary(text)
55
+
56
+
57
+ title = 'PDF Summarizer'
58
+ description = "A platform for generating summary for a PDF using BERT model"
59
+
60
+ with gr.Interface(
61
+ fn=pdf_summary,
62
+ inputs=[
63
+ gr.File(label='PDF', file_types=['.pdf']),
64
+ gr.Textbox(label='Secret')
65
+ ],
66
+ outputs=gr.Textbox(label='Summary'),
67
+ title=title,
68
+ description=description
69
+ ) as iface:
70
+ iface.launch()