Antoniskaraolis's picture
Update app.py
8c96cb8
raw
history blame
1.21 kB
import gradio as gr
from PyPDF2 import PdfReader
from transformers import pipeline
from gtts import gTTS
import tempfile
import os
def extract_abstract_from_pdf(file):
reader = PdfReader(file.name)
first_page = reader.pages[0]
text = first_page.extract_text()
return text
def summarize_text(text, model_name='sshleifer/distilbart-cnn-12-6'):
summarizer = pipeline("summarization", model=model_name)
summary = summarizer(text, max_length=75, min_length=20, do_sample=False)
first_sentence = summary[0]['summary_text'].split('.')[0] + '.'
return first_sentence
def process_pdf_to_speech(file):
abstract = extract_abstract_from_pdf(file)
summary = summarize_text(abstract)
tts = gTTS(text=summary, lang='en')
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as fp:
tts.save(fp.name)
return fp.name
iface = gr.Interface(
fn=process_pdf_to_speech,
inputs=gr.File(label="Upload PDF"),
outputs=gr.outputs.Audio(type="file", label="Play Summary"),
title="PDF Abstract to Speech",
description="This app summarizes the abstract from a PDF and converts it to speech. Please upload a PDF with an abstract."
)
iface.launch()