varl42 commited on
Commit
ed6000d
·
1 Parent(s): 186ead1

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -0
app.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import PyPDF2
4
+ from transformers import pipeline
5
+ import numpy
6
+ import scipy
7
+ from gtts import gTTS
8
+ from io import BytesIO
9
+ from transformers import BartTokenizer
10
+
11
+ def extract_text(pdf_file):
12
+ pdfReader = PyPDF2.PdfReader(pdf_file)
13
+ pageObj = pdfReader.pages[0]
14
+ return pageObj.extract_text()
15
+
16
+
17
+ def summarize_text(text):
18
+ sentences = text.split(". ")
19
+ for i, sentence in enumerate(sentences):
20
+ if "Abstract" in sentence:
21
+ start = i + 1
22
+ end = start + 3
23
+ break
24
+ abstract = ". ".join(sentences[start:end+1])
25
+
26
+ tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn")
27
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn", tokenizer=tokenizer)
28
+ summary = summarizer(abstract, max_length=30, min_length=30,
29
+ do_sample=False)
30
+ return summary[0]['summary_text']
31
+
32
+ def text_to_audio(text):
33
+ tts = gTTS(text, lang='en')
34
+ buffer = BytesIO()
35
+ tts.write_to_fp(buffer)
36
+ buffer.seek(0)
37
+ return buffer.read()
38
+
39
+ def audio_pdf(pdf_file):
40
+ text = extract_text(pdf_file)
41
+ summary = summarize_text(text)
42
+ audio = text_to_audio(summary)
43
+ return summary, audio
44
+
45
+ inputs = gr.File()
46
+ summary_text = gr.Text()
47
+ audio_summary = gr.Audio()
48
+
49
+
50
+ iface = gr.Interface(
51
+ fn=audio_pdf,
52
+ inputs=inputs,
53
+ outputs=[summary_text,audio_summary],
54
+ title="PDF Audio Summarizer 📻",
55
+ description="App that converts an abstract into audio",
56
+ examples=["Attention_is_all_you_need.pdf",
57
+ "ImageNet_Classification.pdf"
58
+ ]
59
+ )
60
+
61
+ iface.launch()