vividsd commited on
Commit
3075f85
1 Parent(s): 25020a5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -0
app.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # imports
2
+
3
+ import gradio as gr
4
+ from transformers import pipeline
5
+ import torch
6
+ import PyPDF2
7
+
8
+ # function to read the uploaded PDF and extract its PDF when present based on the keyword "abstract" search.
9
+ # If the PDF doesn't have the word "abstract" it won't work
10
+ # also, I'm trying to limitate to the abstract itself, not to other sections, by adding a pattern of in between headers
11
+
12
+ def process_pdf(pdf):
13
+ with open(pdf.name, "rb") as f:
14
+ reader = PyPDF2.PdfReader(f)
15
+ text = ""
16
+ for page in reader.pages:
17
+ text += page.extract_text()
18
+ abstract_start = text.lower().find("abstract:")
19
+ if abstract_start != -1:
20
+ abstract_end = text.lower().find("\n\n", abstract_start)
21
+ if abstract_end != -1:
22
+ abstract = text[abstract_start:abstract_end]
23
+ else:
24
+ abstract = text[abstract_start:]
25
+ else:
26
+ abstract = "Abstract not found."
27
+
28
+ return abstract
29
+
30
+ #Now creating the interface to read the PDFs
31
+
32
+ interface = gr.Interface(fn=process_pdf,
33
+ inputs=gr.inputs.File(type="file", label="Upload PDF"),
34
+ outputs="text",
35
+ title="Summarizing outloud",
36
+ description="Extract abstracts from PDFs, summarize then in 1 sentence and get an audio of it",
37
+ examples=[["example_pdf1.pdf"], ["example_pdf2.pdf"]])
38
+
39
+ if __name__ == "__main__":
40
+ interface.launch()