import gradio as gr from PyPDF2 import PdfReader from transformers import pipeline from gtts import gTTS import tempfile import os def extract_abstract_from_pdf(file): reader = PdfReader(file.name) first_page = reader.pages[0] text = first_page.extract_text() return text def summarize_text(text, model_name='sshleifer/distilbart-cnn-12-6'): summarizer = pipeline("summarization", model=model_name) summary = summarizer(text, max_length=75, min_length=20, do_sample=False) first_sentence = summary[0]['summary_text'].split('.')[0] + '.' return first_sentence def process_pdf_to_speech(file): abstract = extract_abstract_from_pdf(file) summary = summarize_text(abstract) tts = gTTS(text=summary, lang='en') with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as fp: tts.save(fp.name) return fp.name iface = gr.Interface( fn=process_pdf_to_speech, inputs=gr.File(label="Upload PDF"), outputs=gr.Audio(label="Play Summary"), title="PDF Abstract to Speech", description="This app summarizes the abstract from a PDF and converts it to speech. Please upload a PDF with an abstract." ) iface.launch()