Spaces:

Xuratron
/

abstract-speech-summarizer

Sleeping

App Files Files Community

Xuratron commited on Dec 9, 2023

Commit

1adb2ef

1 Parent(s): 0e0c910

Update app.py

Browse files

Files changed (1) hide show

app.py +85 -47

app.py CHANGED Viewed

@@ -1,86 +1,124 @@
-#https://huggingface.co/spaces/Xuratron/abstract-speech-summarizer
 # Here are the imports
 import PyPDF2
 import re
 import torch
 from transformers import pipeline
-import soundfile as sf
 from fairseq.checkpoint_utils import load_model_ensemble_and_task_from_hf_hub
 from fairseq.models.text_to_speech.hub_interface import TTSHubInterface
 import gradio as gr
 # Here is the code
 def extract_and_clean_abstract(uploaded_file):
-    """
-    Extracts and cleans the abstract from the uploaded PDF file.
-    """
-    reader = PyPDF2.PdfReader(uploaded_file.file)
-    text = ""
-    for page in reader.pages:
-        text += page.extract_text() or ""
-    # Regular expression pattern to find the abstract
     pattern = r"(Abstract|ABSTRACT|abstract)(.*?)(Introduction|INTRODUCTION|introduction|1|Keywords|KEYWORDS|keywords)"
-    match = re.search(pattern, text, re.DOTALL)
     if match:
         abstract = match.group(2).strip()
     else:
-        abstract = "Abstract not found."
-    # Clean the abstract text
     cleaned_abstract = abstract.replace('\n', ' ').replace('- ', '')
     return cleaned_abstract
-def summarize_text(hf_model_name, text):
-    """
-    Summarizes the given text using a Hugging Face model.
-    """
-    summarizer = pipeline("summarization", model=hf_model_name)
-    summary = summarizer(text, max_length=130, min_length=30, do_sample=False)[0]['summary_text']
-    return summary
 def text_to_speech(text):
-    """
-    Converts text to speech using a Hugging Face model.
-    """
     models, cfg, task = load_model_ensemble_and_task_from_hf_hub(
-        "facebook/fastspeech2-en-ljspeech",
         arg_overrides={"vocoder": "hifigan", "fp16": False}
     )
-    model = models[0]
     TTSHubInterface.update_cfg_with_data_cfg(cfg, task.data_cfg)
     generator = task.build_generator([model], cfg)
     sample = TTSHubInterface.get_model_input(task, text)
     wav, rate = TTSHubInterface.get_prediction(task, model, generator, sample)
-    return wav, rate
-def process_pdf(uploaded_file, hf_model_name):
-    if uploaded_file.name.lower().endswith('.pdf'):
-        abstract = extract_and_clean_abstract(uploaded_file)
-        summary = summarize_text(hf_model_name, abstract)
-        wav, rate = text_to_speech(summary)
-        sf.write('/tmp/speech_output.wav', wav, rate)
-        return '/tmp/speech_output.wav'
-    else:
-        return "Error: Please upload a PDF file."
 iface = gr.Interface(
     fn=process_pdf,
-    inputs=[
-        gr.File(label="Upload PDF"),
-        gr.Textbox(label="Hugging Face Model Name for Summarization")
-    ],
     outputs=gr.Audio(label="Audio Summary"),
     title="PDF Abstract to Speech",
-    description="Extracts and summarizes the abstract from a PDF file and converts it to speech."
 )
-if __name__ == "__main__":
-    iface.launch()

 # Here are the imports
 import PyPDF2
 import re
 import torch
 from transformers import pipeline
 from fairseq.checkpoint_utils import load_model_ensemble_and_task_from_hf_hub
 from fairseq.models.text_to_speech.hub_interface import TTSHubInterface
 import gradio as gr
+import io
+import numpy as np
+import soundfile as sf
+import tempfile
 # Here is the code
+# Function to extract and clean abstract from PDF
 def extract_and_clean_abstract(uploaded_file):
+    if uploaded_file is None:
+        return "No file uploaded."
+    # Read the file using its temporary file path
+    with open(uploaded_file.name, 'rb') as file:
+        reader = PyPDF2.PdfReader(file)
+        full_text = ""
+        for page in reader.pages:
+            full_text += page.extract_text()
+    # Find the abstract
     pattern = r"(Abstract|ABSTRACT|abstract)(.*?)(Introduction|INTRODUCTION|introduction|1|Keywords|KEYWORDS|keywords)"
+    match = re.search(pattern, full_text, re.DOTALL)
     if match:
         abstract = match.group(2).strip()
     else:
+        return "Abstract not found."
+    # Clean the abstract
     cleaned_abstract = abstract.replace('\n', ' ').replace('- ', '')
     return cleaned_abstract
+# Function to summarize text
+def summarize_text(text):
+    # Initialize the summarization pipeline with the summarization model
+    summarizer = pipeline(
+        "summarization",
+        "pszemraj/led-base-book-summary",
+        device=0 if torch.cuda.is_available() else -1,
+    )
+    # Generate the summary
+    result = summarizer(
+        text,
+        min_length=8,
+        max_length=25,
+        no_repeat_ngram_size=3,
+        encoder_no_repeat_ngram_size=3,
+        repetition_penalty=3.5,
+        num_beams=4,
+        do_sample=False,
+        early_stopping=True,
+    )
+     # Extract the first sentence from the summary
+    first_sentence = re.split(r'(?<=[.:;!?])\s', result[0]['summary_text'])[0]
+    return first_sentence
+# Function for text-to-speech
 def text_to_speech(text):
+    # Check if CUDA is available and set the device accordingly
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    # Load the TTS model and task from Hugging Face Hub
     models, cfg, task = load_model_ensemble_and_task_from_hf_hub(
+        "facebook/fastspeech2-en-ljspeech", # Or another TTS model of your choice
         arg_overrides={"vocoder": "hifigan", "fp16": False}
     )
+    # Ensure the model is on the correct device
+    model = models[0].to(device)
+    # Update the config with the data config from the task
     TTSHubInterface.update_cfg_with_data_cfg(cfg, task.data_cfg)
+    # Build the generator
     generator = task.build_generator([model], cfg)
+    # Get the model input from the text
     sample = TTSHubInterface.get_model_input(task, text)
+    sample["net_input"]["src_tokens"] = sample["net_input"]["src_tokens"].to(device)
+    sample["net_input"]["src_lengths"] = sample["net_input"]["src_lengths"].to(device)
+    # Generate the waveform
     wav, rate = TTSHubInterface.get_prediction(task, model, generator, sample)
+    # Move the waveform to CPU if it's on GPU
+    if wav.is_cuda:
+        wav = wav.cpu()
+    # Write the waveform to a temporary file and return the file path
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
+        sf.write(tmp_file.name, wav.numpy(), rate)
+        return tmp_file.name
+def process_pdf(uploaded_file):
+    """
+    Process the uploaded PDF file to extract, summarize the abstract, and convert it to speech.
+    """
+    abstract = extract_and_clean_abstract(uploaded_file)
+    summary = summarize_text(abstract)
+    audio_output = text_to_speech(summary)
+    return audio_output
+# Create Gradio interface
 iface = gr.Interface(
     fn=process_pdf,
+    inputs=gr.File(label="Upload PDF"),
     outputs=gr.Audio(label="Audio Summary"),
     title="PDF Abstract to Speech",
+    description="Upload a PDF file to extract its abstract, summarize it, and convert the summary to speech."
 )
+# Run the Gradio app
+iface.launch()