import os import fitz import gradio as gr import re from summarizer import Summarizer def preprocess(text): text = text.replace('\n', ' ') text = re.sub('\s+', ' ', text) return text def pdf_to_text(path, start_page=1, end_page=None): doc = fitz.open(path) total_pages = doc.page_count if end_page is None: end_page = total_pages text_list = [] for i in range(start_page-1, end_page): text = doc.load_page(i).get_text("text") text = preprocess(text) text_list.append(text) doc.close() return ' '.join(text_list) def generate_summary(text, model='bert-base-uncased', ratio=0.2): model = Summarizer() summary = model(text, ratio=ratio) return summary def pdf_summary(file, secret): if secret != os.environ.get('Secret'): return '[Error]: Please provide the correct secret' elif file is None: return '[ERROR]: Please upload a PDF file.' else: old_file_name = file.name file_name = file.name file_name = file_name[:-12] + file_name[-4:] os.rename(old_file_name, file_name) text = pdf_to_text(file_name) if text.strip() == '': return '[ERROR]: The content of PDF is empty.' return generate_summary(text) title = 'PDF Summarizer' description = "A platform for generating summary for a PDF using BERT model" with gr.Interface( fn=pdf_summary, inputs=[ gr.File(label='PDF', file_types=['.pdf']), gr.Textbox(label='Secret') ], outputs=gr.Textbox(label='Summary'), title=title, description=description ) as iface: iface.launch()