practice / app.py
vividsd's picture
Update app.py
d6d00d9
raw
history blame
2.92 kB
import gradio as gr
from transformers import pipeline
from tempfile import NamedTemporaryFile
from PyPDF2 import PdfReader
from IPython.display import Audio
import numpy as np
from bark import SAMPLE_RATE, generate_audio, preload_models
from scipy.io.wavfile import write as write_wav
import torch
def summarize_abstract_from_pdf(pdf_file_path):
abstract_string = 'abstract'
found_abstract = False
intro_string ='introduction'
extracted_text_string =""
# Read the PDF and extract text from the first page
with open(pdf_file_path, 'rb') as pdf_file:
reader = PdfReader(pdf_file)
text = ""
text += reader.pages[0].extract_text()
file = text.splitlines()
for lines in file:
lower_lines = lines.lower()
if lower_lines.strip()== abstract_string:
found_abstract = True
elif "1" in lower_lines.strip() and intro_string in lower_lines.strip():
found_abstract = False
if found_abstract == True:
extracted_text_string += lines
extracted_text_string = extracted_text_string.replace("Abstract", "")
summarizer = pipeline("summarization", "pszemraj/led-base-book-summary",device=0 if torch.cuda.is_available() else -1,)
# Generate a summarized abstract using the specified model
summarized_abstract = summarizer(extracted_text_string,
min_length=16,
max_length=150,
no_repeat_ngram_size=3,
encoder_no_repeat_ngram_size=3,
repetition_penalty=3.5,
num_beams=4,
early_stopping=True,
)
#I run this twice to get summazired text
summarized_abstract2 = summarizer(summarized_abstract[0]['summary_text'],
min_length=16,
max_length=25,
no_repeat_ngram_size=3,
encoder_no_repeat_ngram_size=3,
repetition_penalty=3.5,
num_beams=4,
early_stopping=True,
)
# Return the summarized abstract as a string
return summarized_abstract2[0]['summary_text']
def generate_audio_func(pdf_file):
pdf_file_path = pdf_file.name
# Generate audio from text
#call the summarize abstract function
text_prompt = summarize_abstract_from_pdf(pdf_file_path)
audio_array = generate_audio(text_prompt)
# Create a temporary WAV file to save the audio
with NamedTemporaryFile(suffix=".wav", delete=False) as temp_wav_file:
wav_file_path = temp_wav_file.name
write_wav(wav_file_path, 22050, (audio_array * 32767).astype(np.int16))
return wav_file_path
# Create the Gradio app
input_component = gr.File(file_types=["pdf"])
output_component = gr.Audio()
demo = gr.Interface(
fn=generate_audio_func,
inputs=input_component,
outputs=output_component,
title="Reading your abstract summary outloud",
description="Upload a PDF that contains an Abstract. Get your abstract summarized in 1 sentence and read outloud. We only accept with PDfs that contains the section Abstract"
demo.launch()