File size: 1,653 Bytes
3a5bd74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204a311
3a5bd74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import os
import fitz
import gradio as gr
import re
from summarizer import Summarizer

def preprocess(text):
    text = text.replace('\n', ' ')
    text = re.sub('\s+', ' ', text)
    return text


def pdf_to_text(path, start_page=1, end_page=None):
    doc = fitz.open(path)
    total_pages = doc.page_count

    if end_page is None:
        end_page = total_pages

    text_list = []

    for i in range(start_page-1, end_page):
        text = doc.load_page(i).get_text("text")
        text = preprocess(text)
        text_list.append(text)

    doc.close()
    return ' '.join(text_list)


def generate_summary(text, model='bert-base-uncased', ratio=0.2):
    model = Summarizer()
    summary = model(text, ratio=ratio)
    return summary


def pdf_summary(file, secret):
    if secret != os.environ.get('Secret'):
        return '[Error]: Please provide the correct secret'

    elif file is None:
        return '[ERROR]: Please upload a PDF file.'
    
    else:
        old_file_name = file.name
        file_name = file.name
        file_name = file_name[:-12] + file_name[-4:]
        os.rename(old_file_name, file_name)
        text = pdf_to_text(file_name)

    if text.strip() == '':
        return '[ERROR]: The content of PDF is empty.'

    return generate_summary(text)


title = 'PDF Summarizer'
description = "A platform for generating summary for a PDF using BERT model"

with gr.Interface(
    fn=pdf_summary,
    inputs=[
        gr.File(label='PDF', file_types=['.pdf']),
        gr.Textbox(label='Secret')
    ],
    outputs=gr.Textbox(label='Summary'),
    title=title,
    description=description
) as iface:
    iface.launch()