|
from sumy.parsers.plaintext import PlaintextParser
|
|
from sumy.nlp.tokenizers import Tokenizer
|
|
from sumy.summarizers.text_rank import TextRankSummarizer
|
|
from sumy.summarizers.luhn import LuhnSummarizer
|
|
from sumy.summarizers.lex_rank import LexRankSummarizer
|
|
from sumy.summarizers.lsa import LsaSummarizer
|
|
from sumy.nlp.stemmers import Stemmer
|
|
from sumy.utils import get_stop_words
|
|
import gradio as gr
|
|
import nltk
|
|
import time
|
|
|
|
def textrank_summarizer(text_corpus):
|
|
start_time = time.time()
|
|
parser = PlaintextParser.from_string(text_corpus, Tokenizer("english"))
|
|
stemmer = Stemmer("english")
|
|
summarizer = TextRankSummarizer(stemmer)
|
|
summarizer.stop_words = get_stop_words("english")
|
|
sentences = summarizer(parser.document, 15)
|
|
summary = ""
|
|
for sentence in sentences:
|
|
summary += str(sentence) + ""
|
|
end_time = time.time()
|
|
print(f"Time taken: {end_time - start_time:.2f} seconds")
|
|
return summary
|
|
|
|
def clear_everything(text_corpus, summary):
|
|
return None, None
|
|
|
|
theme = gr.themes.Soft(
|
|
primary_hue="purple",
|
|
secondary_hue="cyan",
|
|
neutral_hue="slate",
|
|
font=[
|
|
gr.themes.GoogleFont('Syne'),
|
|
gr.themes.GoogleFont('Poppins'),
|
|
gr.themes.GoogleFont('Poppins'),
|
|
gr.themes.GoogleFont('Poppins')
|
|
],
|
|
)
|
|
|
|
with gr.Blocks(theme=theme, title="TextRank Summarizer", fill_height=True) as app:
|
|
gr.HTML(
|
|
value ='''
|
|
<h1 style="text-align: center;">TextRank Summarizer</h1>
|
|
<p style="text-align: center;">This app uses a TextRank approach to summarize PDF documents based on CPU as well as GPU.</p>
|
|
<p style="text-align: center;">The summarization process can take some time depending on the size of the text corpus and the complexity of the content.</p>
|
|
''')
|
|
with gr.Row():
|
|
with gr.Column():
|
|
text_corpus = gr.TextArea(label="Text Corpus", placeholder="Paste the text corpus here", lines=5)
|
|
with gr.Row():
|
|
clear_btn = gr.Button(value="Clear", variant='stop')
|
|
summarize_btn = gr.Button(value="Summarize", variant='primary')
|
|
summary = gr.TextArea(label="Raw Data", placeholder="The generated raw data will be displayed here", lines=7, interactive=False, show_copy_button=True)
|
|
|
|
summarize_btn.click(
|
|
textrank_summarizer,
|
|
inputs=[text_corpus],
|
|
outputs=[summary],
|
|
concurrency_limit=25,
|
|
scroll_to_output=True,
|
|
show_api=True,
|
|
api_name="textrank_summarizer",
|
|
show_progress="full",
|
|
)
|
|
clear_btn.click(clear_everything, inputs=[text_corpus, summary], outputs=[text_corpus, summary], show_api=False)
|
|
|
|
nltk.download('punkt', quiet=True)
|
|
nltk.download('punkt_tab', quiet=True)
|
|
app.queue(default_concurrency_limit=25).launch(show_api=True)
|
|
|