practice / app.py
vividsd's picture
Update app.py
ad174f9
# my space: https://huggingface.co/spaces/vividsd/practice
# I tried to use my previous code but with some adaptions to any PDF that contains an abstract
#imports
import PyPDF2
from transformers import pipeline
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech
from datasets import load_dataset
import torch
from transformers import SpeechT5HifiGan
from gradio import gr
import gradio as gr
# Now copying my code and adapting it for any PDF
def extract_abstract(pdf_file_path):
with open(pdf_file_path, 'rb') as pdf_file:
reader = PyPDF2.PdfReader(pdf_file)
text = reader.pages[0].extract_text()
# in order to extract the exact part on the first page that is useful to me,
# I needed to consider that the papers follow a pattern in which after the Abstract, there is an Introduction
# and then cut the text right before the introduction
abstract_start_index = text.find('Abstract')
introduction_start_index = text.find('Introduction')
if abstract_start_index == -1 or introduction_start_index == -1:
return "" # Abstract or introduction section not found
abstract = text[abstract_start_index + len('Abstract'):introduction_start_index].strip()
return abstract
return ""
#to be acessed to any pdf
abstract_text = extract_abstract(pdf_file_path)
print(abstract_text)
from transformers import pipeline
summarizer = pipeline("summarization", model="Falconsai/text_summarization")
print(summarizer(abstract_text, max_length=25, min_length=10, do_sample=False))
output = summarizer(abstract_text, max_length=26, min_length=10, do_sample=False)
summary = output[0]['summary_text']
print(summary)
# proceeding to the audio function
def audio(text):
processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
summary
inputs = processor(text=summary, return_tensors="pt")
embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
spectrogram = model.generate_speech(inputs["input_ids"], speaker_embeddings)
vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
with torch.no_grad():
speech = vocoder(spectrogram)
speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
Audio(speech, rate=16000)
# Creating the Gradio app
input_component = gr.File(file_types=["pdf"])
output_component = gr.Audio()
demo = gr.Interface(
fn=audio,
inputs=input_component,
outputs=output_component,
title="Reading your abstract summary outloud",
description="Upload a PDF that contains an Abstract. Get your abstract summarized in 1 sentence and read outloud. We only accept with PDfs that contains the section Abstract followed by one called Introduction"
)
demo.launch()