Spaces:

vividsd
/

practice

Build error

App Files Files Community

practice / app.py

vividsd

Update app.py

d6d00d9 over 1 year ago

raw

history blame

2.92 kB

	import gradio as gr
	from transformers import pipeline
	from tempfile import NamedTemporaryFile
	from PyPDF2 import PdfReader
	from IPython.display import Audio
	import numpy as np
	from bark import SAMPLE_RATE, generate_audio, preload_models
	from scipy.io.wavfile import write as write_wav
	import torch

	def summarize_abstract_from_pdf(pdf_file_path):
	abstract_string = 'abstract'
	found_abstract = False
	intro_string ='introduction'
	extracted_text_string =""

	# Read the PDF and extract text from the first page
	with open(pdf_file_path, 'rb') as pdf_file:
	reader = PdfReader(pdf_file)
	text = ""
	text += reader.pages[0].extract_text()


	file = text.splitlines()
	for lines in file:
	lower_lines = lines.lower()
	if lower_lines.strip()== abstract_string:
	found_abstract = True
	elif "1" in lower_lines.strip() and intro_string in lower_lines.strip():
	found_abstract = False

	if found_abstract == True:
	extracted_text_string += lines


	extracted_text_string = extracted_text_string.replace("Abstract", "")
	summarizer = pipeline("summarization", "pszemraj/led-base-book-summary",device=0 if torch.cuda.is_available() else -1,)
	# Generate a summarized abstract using the specified model
	summarized_abstract = summarizer(extracted_text_string,
	min_length=16,
	max_length=150,
	no_repeat_ngram_size=3,
	encoder_no_repeat_ngram_size=3,
	repetition_penalty=3.5,
	num_beams=4,
	early_stopping=True,
	)
	#I run this twice to get summazired text
	summarized_abstract2 = summarizer(summarized_abstract[0]['summary_text'],
	min_length=16,
	max_length=25,
	no_repeat_ngram_size=3,
	encoder_no_repeat_ngram_size=3,
	repetition_penalty=3.5,
	num_beams=4,
	early_stopping=True,
	)



	# Return the summarized abstract as a string
	return summarized_abstract2[0]['summary_text']

	def generate_audio_func(pdf_file):

	pdf_file_path = pdf_file.name
	# Generate audio from text
	#call the summarize abstract function
	text_prompt = summarize_abstract_from_pdf(pdf_file_path)
	audio_array = generate_audio(text_prompt)

	# Create a temporary WAV file to save the audio
	with NamedTemporaryFile(suffix=".wav", delete=False) as temp_wav_file:
	wav_file_path = temp_wav_file.name
	write_wav(wav_file_path, 22050, (audio_array * 32767).astype(np.int16))
	return wav_file_path

	# Create the Gradio app
	input_component = gr.File(file_types=["pdf"])
	output_component = gr.Audio()

	demo = gr.Interface(
	fn=generate_audio_func,
	inputs=input_component,
	outputs=output_component,
	title="Reading your abstract summary outloud",
	description="Upload a PDF that contains an Abstract. Get your abstract summarized in 1 sentence and read outloud. We only accept with PDfs that contains the section Abstract"

	demo.launch()