|
|
|
|
|
import gradio as gr |
|
from transformers import pipeline |
|
import torch |
|
import PyPDF2 |
|
|
|
|
|
|
|
|
|
|
|
def process_pdf(pdf): |
|
with open(pdf.name, "rb") as f: |
|
reader = PyPDF2.PdfReader(f) |
|
text = "" |
|
for page in reader.pages: |
|
text += page.extract_text() |
|
abstract_start = text.lower().find("abstract:") |
|
if abstract_start != -1: |
|
abstract_end = text.lower().find("\n\n", abstract_start) |
|
if abstract_end != -1: |
|
abstract = text[abstract_start:abstract_end] |
|
else: |
|
abstract = text[abstract_start:] |
|
else: |
|
abstract = "Abstract not found." |
|
|
|
return abstract |
|
|
|
|
|
|
|
interface = gr.Interface(fn=process_pdf, |
|
inputs=gr.inputs.File(type="file", label="Upload PDF"), |
|
outputs="text", |
|
title="Summarizing outloud", |
|
description="Extract abstracts from PDFs, summarize then in 1 sentence and get an audio of it", |
|
examples=[["example_pdf1.pdf"], ["example_pdf2.pdf"]]) |
|
|
|
if __name__ == "__main__": |
|
interface.launch() |