File size: 2,021 Bytes
26ce009
 
 
 
 
 
d7d4c89
26ce009
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114ba9b
ac5b1bb
2e59416
38d02f2
26ce009
 
aff5bd1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import gradio as gr
from PyPDF2 import PdfReader
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from gtts import gTTS
from io import BytesIO
import re
import os

model_name = "ArtifactAI/led_large_16384_arxiv_summarization"
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

def extract_first_sentence(text):
    sentences = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', text)
    if sentences:
        return sentences[0]
    else:
        return text

def summarize_pdf_abstract(pdf_file):
    try:
        reader = PdfReader(pdf_file)
        abstract_text = ""
        for page in reader.pages:
            if "Abstract" in page.extract_text() or "Introduction" in page.extract_text():
                abstract_text = page.extract_text()
                break

        inputs = tokenizer(abstract_text, return_tensors="pt")
        outputs = model.generate(**inputs)
        summary = tokenizer.decode(outputs[0])

        # Extract only the first sentence
        summary_sentence = extract_first_sentence(summary)

        # Generate audio
        speech = gTTS(text=summary_sentence, lang="en")
        speech_bytes = BytesIO()
        speech.write_to_fp(speech_bytes)

        # Return individual output values
        return summary_sentence, speech_bytes.getvalue()

    except Exception as e:
        raise Exception(str(e))

interface = gr.Interface(
    fn=summarize_pdf_abstract,
    inputs=[gr.File(label="Upload PDF")],
    outputs=[gr.Textbox(label="Summary"), gr.Audio()],
    title="PDF Summarization & Audio Tool",
    description="""PDF Summarization App. This app summarizes the abstract of a PDF in one sentence and generates an audio of it. Only upload PDF's with Abstracts 
    Please read the README.MD for information about the app and sample PDFs.""",
    examples=[os.path.join(os.path.dirname(__file__), "Article 11 Hidden Technical Debt in Machine Learning Systems.pdf")],
)

interface.launch(share=True)