import gradio as gr import numpy as np import pytesseract as pt import pdf2image from fpdf import FPDF import re import nltk from nltk.tokenize import sent_tokenize from nltk.tokenize import word_tokenize import os import pdfkit import yake from summarizer import Summarizer,TransformerSummarizer from transformers import pipelines #nltk.download('punkt') from transformers import AutoTokenizer, AutoModelForPreTraining model_name = 'nlpaueb/legal-bert-base-uncased' tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForPreTraining.from_pretrained(model_name) bert_legal_model = Summarizer(custom_model=model, custom_tokenizer=tokenizer) def get_response(input_text): output_text= bert_legal_model(input_text, min_length = 8, ratio = 0.05) return output_text iface = gr.Interface( get_response, "text", "text" ) if __name__ == "__main__": iface.launch(share=False)