playground / app.py
xavierbarbier's picture
Update app.py
79776cb verified
raw
history blame
972 Bytes
import gradio as gr
import numpy as np
from pypdf import PdfReader
import os
from transformers import pipeline
model_path = "mrm8488/camembert2camembert_shared-finetuned-french-summarization"
pipe = pipeline('summarization', model_path)
min_length = 500
def extract_text(file):
reader = PdfReader(file)
text = []
for p in np.arange(0, len(reader.pages), 1):
page = reader.pages[int(p)]
# extracting text from page
text.append(page.extract_text())
text = ' '.join(text)
return text
def summarise(text):
pred = pipe(text , min_length)
return pred[0]["summary_text"]
with gr.Blocks() as demo:
file_input = gr.File(label="Upload a PDF file")
text_output = gr.Textbox(label="Extracted Text")
summary_output = gr.Textbox(label="Summary")
file_input.upload(extract_text, inputs=file_input, outputs=text_output)
text_output.change(summarise,text_output,summary_output)
demo.launch()