pip install gradio PyMuPDF import gradio as gr from transformers import T5Tokenizer, MT5ForConditionalGeneration import fitz # PyMuPDF # Load the fine-tuned tokenizer and model model_name = "fine-tuned-mt5" new_tokenizer = T5Tokenizer.from_pretrained(model_name, clean_up_tokenization_spaces=True) new_model = MT5ForConditionalGeneration.from_pretrained(model_name) # Function to extract text from PDF using PyMuPDF def extract_text_from_pdf(pdf_file): text = "" # Open the PDF file with fitz.open(pdf_file) as doc: for page in doc: text += page.get_text() # Extract text from each page return text # Summarization function def summarize_pdf(pdf_file, max_summary_length): # Extract text from the PDF input_text = extract_text_from_pdf(pdf_file) # Tokenize the input to check length tokenized_input = new_tokenizer.encode(input_text, return_tensors='pt') try: # Generate the summary summary_ids = new_model.generate( tokenized_input, max_length=max_summary_length, min_length=30, num_beams=15, repetition_penalty=5.0, no_repeat_ngram_size=2 ) # Decode the generated summary summary = new_tokenizer.decode(summary_ids[0], skip_special_tokens=True) # Clean up the summary to remove unwanted tokens cleaned_summary = ' '.join([token for token in summary.split() if not token.startswith('