Spaces:

manuel-calzolari
/

assessment3_part2

Runtime error

File size: 3,928 Bytes

# https://huggingface.co/spaces/manuel-calzolari/assessment3_part2

# Import modules
import re
import fitz
import scipy
import torch
from transformers import pipeline
import gradio as gr

# Path of the audio file to save
TTS_AUDIO_PATH = "tts.wav"


def extract_text(pdf_path):
    """
    Function to extract text from PDF.
    """
    with fitz.open(pdf_path) as pdf:
        text = ""
        for i, page in enumerate(pdf, start=1):
            text += page.get_text()
    return text


def get_abstract(text):
    """
    Function to get the abstract:
    - Remove the text before the abstract
    - Remove the text after the abstract (after the next title)
    - Remove new lines

    This works for some tested PDFs but obviously may not work with every
    possible layout.
    """
    abstract = re.split(r"\n(?:Abstract|ABSTRACT)\n", text)[1]
    abstract = re.split(r"\n[1A-Z∗][a-zA-Z @:,.{}]{,96}\n", abstract)[0]
    abstract = abstract.replace("-\n", "").replace("\n", " ")
    return abstract


def generate_summary(abstract):
    """
    Function to generate the summary.
    Use a model trained to generate one-line summary based on abstract of papers
    See: https://huggingface.co/snrspeaks/t5-one-line-summary
    """
    summarizer = pipeline(
        "summarization",
        model="snrspeaks/t5-one-line-summary",
        device="cuda:0" if torch.cuda.is_available() else "cpu",
    )

    summary = summarizer(
        abstract,
        max_length=64,  # Maximum length of the summary
    )[0]["summary_text"]

    # If it's not already there, add a full stop at the end of the summary
    summary = summary if summary.endswith(".") else summary + "."

    return summary


def generate_speech(summary):
    """
    Function to generate the speech (TTS model).
    See: https://huggingface.co/suno/bark-small
    Note 1: I get some PyTorch warnings but it seems to work.
    Note 2: Sometimes (not always) this TTS model adds spurious sounds or words
    at the end (or more rarely at the beginning) of the speech related to the
    text being provided.
    """
    synthesiser = pipeline(
        "text-to-speech",
        model="suno/bark-small",
        device="cuda:0" if torch.cuda.is_available() else "cpu",
    )

    speech = synthesiser(
        summary,
        forward_params={"do_sample": True},  # From the bark-small usage example
    )

    return speech


def save_speech(speech, audio_path):
    """
    Function to save the speech to a WAV file (from the bark-small usage example)
    """
    scipy.io.wavfile.write(
        audio_path,
        rate=speech["sampling_rate"],
        data=speech["audio"].T,  # Transpose to get shape (n_samples, n_channels)
    )


def synthesis(pdf_path):
    try:
        # Extract text from PDF
        text = extract_text(pdf_path)
        # Get the abstract
        abstract = get_abstract(text)
    except:
        return "ERROR: ABSTRACT NOT FOUND!!!", None
    # Generate the summary
    summary = generate_summary(abstract)
    # Generate the speech of the summary
    speech = generate_speech(summary)
    # Save the speech to a file
    save_speech(speech, TTS_AUDIO_PATH)

    return summary, TTS_AUDIO_PATH


# Build and launch the app
summary_tts = gr.Interface(
    fn=synthesis,
    inputs=gr.File(type="filepath", file_types=[".pdf"], label="Upload PDF with abstract"),
    outputs=[gr.Text(label="Summary"), gr.Audio(label="Summary TTS")],
    title="PDF voice abstract summarization",
    description="Upload a PDF with an abstract and this app will summarize the abstract in one sentence and read the summary aloud.",
    examples=[
        "Article 11 Hidden Technical Debt in Machine Learning Systems.pdf",
        "Article 7 Efficient Estimation of Word Representations in Vector Space.pdf",
        "Article 5 A Comprehensive Survey on Applications of Transformers for Deep Learning Tasks.pdf",
    ],
  )
summary_tts.launch()