Spaces:
Sleeping
Sleeping
Chris Finlayson
commited on
Commit
·
1c3b33f
1
Parent(s):
72d404b
Add pytorch
Browse files- application.py +31 -0
- requirements.txt +2 -1
application.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import PyPDF2
|
2 |
+
from transformers import pipeline
|
3 |
+
import gradio as gr
|
4 |
+
|
5 |
+
# Load the summarization pipeline
|
6 |
+
summarizer = pipeline("summarization", model="t5-base", tokenizer="t5-base", framework="pt")
|
7 |
+
|
8 |
+
def load_pdf(file, max_length, min_length, do_sample):
|
9 |
+
pdf = PyPDF2.PdfReader(file)
|
10 |
+
text = ""
|
11 |
+
for page in pdf.pages:
|
12 |
+
text += page.extract_text()
|
13 |
+
# Summarize the text
|
14 |
+
|
15 |
+
summary = summarizer(text, max_length=max_length, min_length=min_length, do_sample=do_sample)
|
16 |
+
# Return the summary
|
17 |
+
return summary[0]['summary_text']
|
18 |
+
# return summarize_pdf(pdf)
|
19 |
+
|
20 |
+
|
21 |
+
inputs = [
|
22 |
+
gr.File(label="Upload PDF"),
|
23 |
+
gr.Slider(500, 1500, value=1000, label="Max Length"),
|
24 |
+
gr.Slider(30, 500, value=300, label="Min Length"),
|
25 |
+
gr.Checkbox(label="Do Sample", value=False)
|
26 |
+
]
|
27 |
+
|
28 |
+
iface = gr.Interface(fn=load_pdf, inputs=inputs, outputs="text", title="PDF summariser")
|
29 |
+
iface.launch()
|
30 |
+
|
31 |
+
|
requirements.txt
CHANGED
@@ -1,2 +1,3 @@
|
|
1 |
pypdf2
|
2 |
-
transformers
|
|
|
|
1 |
pypdf2
|
2 |
+
transformers
|
3 |
+
pytorch
|