File size: 908 Bytes
eb2bab3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74d23f2
eb2bab3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import PyPDF2
from transformers import pipeline
import gradio as gr

# Load the summarization pipeline
summarizer = pipeline("summarization",  model="t5-base", tokenizer="t5-base", framework="pt")

def load_pdf(file, max_length, min_length, do_sample):
    pdf = PyPDF2.PdfReader(file)
    text = ""
    for page in pdf.pages:
        text += page.extract_text()
    # Summarize the text
    
    summary = summarizer(text, max_length=max_length, min_length=min_length, do_sample=do_sample)
    # Return the summary
    return summary[0]['summary_text']
    # return summarize_pdf(pdf)
    

inputs = [
    gr.File(label="Upload PDF"),
    gr.Slider(500, 1500, value=1000, label="Max Length"),
    gr.Slider(30, 500, value=300, label="Min Length"),
    gr.Checkbox(label="Do Sample", value=False)
]

iface = gr.Interface(fn=load_pdf, inputs=inputs, outputs="text", title="PDF auto-summarise")
iface.launch()