File size: 972 Bytes
704093d
 
 
 
79776cb
 
 
 
 
704093d
 
 
c3e00da
704093d
 
 
 
 
 
 
 
 
c3e00da
704093d
beb1c01
79776cb
 
 
 
 
 
704093d
 
 
 
79776cb
704093d
79776cb
704093d
79776cb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import gradio as gr
import numpy as np
from pypdf import PdfReader
import os
from transformers import pipeline

model_path = "mrm8488/camembert2camembert_shared-finetuned-french-summarization"
pipe = pipeline('summarization', model_path)
min_length = 500

def extract_text(file):

    
    reader = PdfReader(file)

    text = []
    for p in np.arange(0, len(reader.pages), 1):
        page = reader.pages[int(p)]

        # extracting text from page
        text.append(page.extract_text())

    text = ' '.join(text)

    return text
    
def summarise(text):

    pred = pipe(text ,  min_length)
    
    return pred[0]["summary_text"]

with gr.Blocks() as demo:
    file_input = gr.File(label="Upload a PDF file")
    text_output = gr.Textbox(label="Extracted Text")
    summary_output = gr.Textbox(label="Summary")
    file_input.upload(extract_text, inputs=file_input, outputs=text_output)
    text_output.change(summarise,text_output,summary_output)

demo.launch()