|
|
|
|
|
|
|
import gradio as gr |
|
from PyPDF2 import PdfReader |
|
from transformers import pipeline |
|
from gtts import gTTS |
|
import tempfile |
|
import os |
|
|
|
|
|
def extract_abstract_from_pdf(file): |
|
reader = PdfReader(file.name) |
|
first_page = reader.pages[0] |
|
text = first_page.extract_text() |
|
return text |
|
|
|
def summarize_text(text, model_name='sshleifer/distilbart-cnn-12-6'): |
|
summarizer = pipeline("summarization", model=model_name) |
|
summary = summarizer(text, max_length=75, min_length=20, do_sample=False) |
|
first_sentence = summary[0]['summary_text'].split('.')[0] + '.' |
|
return first_sentence |
|
|
|
def process_pdf_to_speech(file): |
|
abstract = extract_abstract_from_pdf(file) |
|
summary = summarize_text(abstract) |
|
tts = gTTS(text=summary, lang='en') |
|
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as fp: |
|
tts.save(fp.name) |
|
return fp.name |
|
|
|
iface = gr.Interface( |
|
fn=process_pdf_to_speech, |
|
inputs=gr.File(label="Upload PDF"), |
|
outputs=gr.Audio(label="Play Summary"), |
|
title="PDF Abstract to Speech", |
|
description="This app summarizes the abstract from a PDF and converts it to speech. Please upload a PDF with an abstract." |
|
) |
|
|
|
iface.launch() |