Spaces:

vividsd
/

practice

Build error

File size: 2,950 Bytes

3075f85
 
96ebfa6
 
 
 
 
 
3075f85
 
96ebfa6
 
 
 
 
3075f85
96ebfa6
 
 
3075f85
96ebfa6

import gradio as gr
from transformers import pipeline
from tempfile import NamedTemporaryFile
from PyPDF2 import PdfReader
from IPython.display import Audio
import numpy as np
from bark import SAMPLE_RATE, generate_audio, preload_models
from scipy.io.wavfile import write as write_wav
import torch

def summarize_abstract_from_pdf(pdf_file_path):
    abstract_string = 'abstract'
    found_abstract = False
    intro_string ='introduction'
    extracted_text_string =""

    # Read the PDF and extract text from the first page
    with open(pdf_file_path, 'rb') as pdf_file:
        reader = PdfReader(pdf_file)
        text = ""
        text += reader.pages[0].extract_text()


    file = text.splitlines()
    for lines in file:
      lower_lines = lines.lower()
      if lower_lines.strip()== abstract_string:
        found_abstract = True
      elif "1" in lower_lines.strip() and intro_string in lower_lines.strip():
        found_abstract = False

      if found_abstract == True:
        extracted_text_string += lines


    extracted_text_string = extracted_text_string.replace("Abstract", "")
    summarizer = pipeline("summarization", "pszemraj/led-base-book-summary",device=0 if torch.cuda.is_available() else -1,)
    # Generate a summarized abstract using the specified model
    summarized_abstract = summarizer(extracted_text_string,
    min_length=16,
    max_length=150,
    no_repeat_ngram_size=3,
    encoder_no_repeat_ngram_size=3,
    repetition_penalty=3.5,
    num_beams=4,
    early_stopping=True,
    )
    #I run this twice to get summazired text
    summarized_abstract2 = summarizer(summarized_abstract[0]['summary_text'],
    min_length=16,
    max_length=25,
    no_repeat_ngram_size=3,
    encoder_no_repeat_ngram_size=3,
    repetition_penalty=3.5,
    num_beams=4,
    early_stopping=True,
    )



    # Return the summarized abstract as a string
    return summarized_abstract2[0]['summary_text']

def generate_audio_func(pdf_file):
    
    pdf_file_path = pdf_file.name
  # Generate audio from text
  #call the summarize abstract function
    text_prompt =  summarize_abstract_from_pdf(pdf_file_path)
    audio_array = generate_audio(text_prompt)
    
  # Create a temporary WAV file to save the audio
    with NamedTemporaryFile(suffix=".wav", delete=False) as temp_wav_file:
        wav_file_path = temp_wav_file.name
        write_wav(wav_file_path, 22050, (audio_array * 32767).astype(np.int16))
    return wav_file_path

    

# Define app name, app description, and examples
app_name = "PDF to Audio Converter"
app_description = "Convert text from a PDF file to audio. Upload a PDF file. We accept only PDF files with abstracts."

# Create the Gradio app
input_component = gr.File(file_types=["pdf"])
output_component = gr.Audio()

demo = gr.Interface(
    fn=generate_audio_func,
    inputs=input_component,
    outputs=output_component,
    title=app_name,
    description=app_description
)

demo.launch()