import gradio as gr from PyPDF2 import PdfReader from transformers import AutoModelForSeq2SeqLM, AutoTokenizer from gtts import gTTS from io import BytesIO import re model_name = "ArtifactAI/led_large_16384_arxiv_summarization" model = AutoModelForSeq2SeqLM.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name) def extract_first_sentence(text): sentences = re.split(r'(?