File size: 3,928 Bytes
1b6419e
 
 
 
 
 
 
 
1f89864
 
1b6419e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1f89864
1b6419e
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# https://huggingface.co/spaces/manuel-calzolari/assessment3_part2

# Import modules
import re
import fitz
import scipy
import torch
from transformers import pipeline
import gradio as gr

# Path of the audio file to save
TTS_AUDIO_PATH = "tts.wav"


def extract_text(pdf_path):
    """
    Function to extract text from PDF.
    """
    with fitz.open(pdf_path) as pdf:
        text = ""
        for i, page in enumerate(pdf, start=1):
            text += page.get_text()
    return text


def get_abstract(text):
    """
    Function to get the abstract:
    - Remove the text before the abstract
    - Remove the text after the abstract (after the next title)
    - Remove new lines

    This works for some tested PDFs but obviously may not work with every
    possible layout.
    """
    abstract = re.split(r"\n(?:Abstract|ABSTRACT)\n", text)[1]
    abstract = re.split(r"\n[1A-Z∗][a-zA-Z @:,.{}]{,96}\n", abstract)[0]
    abstract = abstract.replace("-\n", "").replace("\n", " ")
    return abstract


def generate_summary(abstract):
    """
    Function to generate the summary.
    Use a model trained to generate one-line summary based on abstract of papers
    See: https://huggingface.co/snrspeaks/t5-one-line-summary
    """
    summarizer = pipeline(
        "summarization",
        model="snrspeaks/t5-one-line-summary",
        device="cuda:0" if torch.cuda.is_available() else "cpu",
    )

    summary = summarizer(
        abstract,
        max_length=64,  # Maximum length of the summary
    )[0]["summary_text"]

    # If it's not already there, add a full stop at the end of the summary
    summary = summary if summary.endswith(".") else summary + "."

    return summary


def generate_speech(summary):
    """
    Function to generate the speech (TTS model).
    See: https://huggingface.co/suno/bark-small
    Note 1: I get some PyTorch warnings but it seems to work.
    Note 2: Sometimes (not always) this TTS model adds spurious sounds or words
    at the end (or more rarely at the beginning) of the speech related to the
    text being provided.
    """
    synthesiser = pipeline(
        "text-to-speech",
        model="suno/bark-small",
        device="cuda:0" if torch.cuda.is_available() else "cpu",
    )

    speech = synthesiser(
        summary,
        forward_params={"do_sample": True},  # From the bark-small usage example
    )

    return speech


def save_speech(speech, audio_path):
    """
    Function to save the speech to a WAV file (from the bark-small usage example)
    """
    scipy.io.wavfile.write(
        audio_path,
        rate=speech["sampling_rate"],
        data=speech["audio"].T,  # Transpose to get shape (n_samples, n_channels)
    )


def synthesis(pdf_path):
    try:
        # Extract text from PDF
        text = extract_text(pdf_path)
        # Get the abstract
        abstract = get_abstract(text)
    except:
        return "ERROR: ABSTRACT NOT FOUND!!!", None
    # Generate the summary
    summary = generate_summary(abstract)
    # Generate the speech of the summary
    speech = generate_speech(summary)
    # Save the speech to a file
    save_speech(speech, TTS_AUDIO_PATH)

    return summary, TTS_AUDIO_PATH


# Build and launch the app
summary_tts = gr.Interface(
    fn=synthesis,
    inputs=gr.File(type="filepath", file_types=[".pdf"], label="Upload PDF with abstract"),
    outputs=[gr.Text(label="Summary"), gr.Audio(label="Summary TTS")],
    title="PDF voice abstract summarization",
    description="Upload a PDF with an abstract and this app will summarize the abstract in one sentence and read the summary aloud.",
    examples=[
        "Article 11 Hidden Technical Debt in Machine Learning Systems.pdf",
        "Article 7 Efficient Estimation of Word Representations in Vector Space.pdf",
        "Article 5 A Comprehensive Survey on Applications of Transformers for Deep Learning Tasks.pdf",
    ],
  )
summary_tts.launch()