psyne's picture
Update app.py
204a311
import os
import fitz
import gradio as gr
import re
from summarizer import Summarizer
def preprocess(text):
text = text.replace('\n', ' ')
text = re.sub('\s+', ' ', text)
return text
def pdf_to_text(path, start_page=1, end_page=None):
doc = fitz.open(path)
total_pages = doc.page_count
if end_page is None:
end_page = total_pages
text_list = []
for i in range(start_page-1, end_page):
text = doc.load_page(i).get_text("text")
text = preprocess(text)
text_list.append(text)
doc.close()
return ' '.join(text_list)
def generate_summary(text, model='bert-base-uncased', ratio=0.2):
model = Summarizer()
summary = model(text, ratio=ratio)
return summary
def pdf_summary(file, secret):
if secret != os.environ.get('Secret'):
return '[Error]: Please provide the correct secret'
elif file is None:
return '[ERROR]: Please upload a PDF file.'
else:
old_file_name = file.name
file_name = file.name
file_name = file_name[:-12] + file_name[-4:]
os.rename(old_file_name, file_name)
text = pdf_to_text(file_name)
if text.strip() == '':
return '[ERROR]: The content of PDF is empty.'
return generate_summary(text)
title = 'PDF Summarizer'
description = "A platform for generating summary for a PDF using BERT model"
with gr.Interface(
fn=pdf_summary,
inputs=[
gr.File(label='PDF', file_types=['.pdf']),
gr.Textbox(label='Secret')
],
outputs=gr.Textbox(label='Summary'),
title=title,
description=description
) as iface:
iface.launch()