from sumy.parsers.plaintext import PlaintextParser from sumy.nlp.tokenizers import Tokenizer from sumy.summarizers.text_rank import TextRankSummarizer from sumy.nlp.stemmers import Stemmer from sumy.utils import get_stop_words import gradio as gr import nltk import time def textrank_summarizer(text_corpus): start_time = time.time() parser = PlaintextParser.from_string(text_corpus, Tokenizer("english")) stemmer = Stemmer("english") summarizer = TextRankSummarizer(stemmer) summarizer.stop_words = get_stop_words("english") sentences = summarizer(parser.document, 25) summary = "" for sentence in sentences: summary += str(sentence) + "" end_time = time.time() print(f"Time taken: {end_time - start_time:.2f} seconds") return summary def clear_everything(text_corpus, summary): return None, None theme = gr.themes.Soft( primary_hue="purple", secondary_hue="cyan", neutral_hue="slate", font=[ gr.themes.GoogleFont('Syne'), gr.themes.GoogleFont('Poppins'), gr.themes.GoogleFont('Poppins'), gr.themes.GoogleFont('Poppins') ], ) with gr.Blocks(theme=theme, title="TextRank Summarizer", fill_height=True) as app: gr.HTML( value ='''
This app uses a TextRank approach to summarize PDF documents based on CPU.
The summarization process can take some time depending on the size of the text corpus and the complexity of the content.
''') with gr.Row(): with gr.Column(): text_corpus = gr.TextArea(label="Text Corpus", placeholder="Paste the text corpus here", lines=5) with gr.Row(): clear_btn = gr.Button(value="Clear", variant='stop') summarize_btn = gr.Button(value="Summarize", variant='primary') summary = gr.TextArea(label="Raw Data", placeholder="The generated raw data will be displayed here", lines=7, interactive=False, show_copy_button=True) summarize_btn.click( textrank_summarizer, inputs=[text_corpus], outputs=[summary], concurrency_limit=25, scroll_to_output=True, show_api=True, api_name="textrank_summarizer", show_progress="full", ) clear_btn.click(clear_everything, inputs=[text_corpus, summary], outputs=[text_corpus, summary], show_api=False) nltk.download('punkt', quiet=True) nltk.download('punkt_tab', quiet=True) app.queue(default_concurrency_limit=25).launch(show_api=True, max_threads=500, ssr_mode=False)