Spaces:
Sleeping
Sleeping
# https://huggingface.co/spaces/Antoniskaraolis/AI_Audio_Processing | |
# Here are the imports | |
import gradio as gr | |
from PyPDF2 import PdfReader | |
from transformers import pipeline | |
from gtts import gTTS | |
import tempfile | |
import os | |
# Here is the code | |
def extract_abstract_from_pdf(file): | |
reader = PdfReader(file.name) | |
first_page = reader.pages[0] | |
text = first_page.extract_text() | |
return text | |
def summarize_text(text, model_name='sshleifer/distilbart-cnn-12-6'): | |
summarizer = pipeline("summarization", model=model_name) | |
summary = summarizer(text, max_length=75, min_length=20, do_sample=False) | |
first_sentence = summary[0]['summary_text'].split('.')[0] + '.' | |
return first_sentence | |
def process_pdf_to_speech(file): | |
abstract = extract_abstract_from_pdf(file) | |
summary = summarize_text(abstract) | |
tts = gTTS(text=summary, lang='en') | |
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as fp: | |
tts.save(fp.name) | |
return fp.name | |
iface = gr.Interface( | |
fn=process_pdf_to_speech, | |
inputs=gr.File(label="Upload PDF"), | |
outputs=gr.Audio(label="Play Summary"), | |
title="PDF Abstract to Speech", | |
description="This app summarizes the abstract from a PDF and converts it to speech. Please upload a PDF with an abstract." | |
) | |
iface.launch() |