raannakasturi commited on
Commit
b1c7321
·
1 Parent(s): 5f8be2c

Add TextRank summarizer app with Gradio interface and dependencies

Browse files
Files changed (2) hide show
  1. app.py +71 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sumy.parsers.plaintext import PlaintextParser
2
+ from sumy.nlp.tokenizers import Tokenizer
3
+ from sumy.summarizers.text_rank import TextRankSummarizer
4
+ from sumy.summarizers.luhn import LuhnSummarizer
5
+ from sumy.summarizers.lex_rank import LexRankSummarizer
6
+ from sumy.summarizers.lsa import LsaSummarizer
7
+ from sumy.nlp.stemmers import Stemmer
8
+ from sumy.utils import get_stop_words
9
+ import gradio as gr
10
+ import nltk
11
+ import time
12
+
13
+ def textrank_summarizer(text_corpus):
14
+ start_time = time.time()
15
+ parser = PlaintextParser.from_string(text_corpus, Tokenizer("english"))
16
+ stemmer = Stemmer("english")
17
+ summarizer = TextRankSummarizer(stemmer)
18
+ summarizer.stop_words = get_stop_words("english")
19
+ sentences = summarizer(parser.document, 15)
20
+ summary = ""
21
+ for sentence in sentences:
22
+ summary += str(sentence) + ""
23
+ end_time = time.time()
24
+ print(f"Time taken: {end_time - start_time:.2f} seconds")
25
+ return summary
26
+
27
+ def clear_everything(text_corpus, summary):
28
+ return None, None
29
+
30
+ theme = gr.themes.Soft(
31
+ primary_hue="purple",
32
+ secondary_hue="cyan",
33
+ neutral_hue="slate",
34
+ font=[
35
+ gr.themes.GoogleFont('Syne'),
36
+ gr.themes.GoogleFont('Poppins'),
37
+ gr.themes.GoogleFont('Poppins'),
38
+ gr.themes.GoogleFont('Poppins')
39
+ ],
40
+ )
41
+
42
+ with gr.Blocks(theme=theme, title="TextRank Summarizer", fill_height=True) as app:
43
+ gr.HTML(
44
+ value ='''
45
+ <h1 style="text-align: center;">TextRank Summarizer</h1>
46
+ <p style="text-align: center;">This app uses a TextRank approach to summarize PDF documents based on CPU as well as GPU.</p>
47
+ <p style="text-align: center;">The summarization process can take some time depending on the size of the text corpus and the complexity of the content.</p>
48
+ ''')
49
+ with gr.Row():
50
+ with gr.Column():
51
+ text_corpus = gr.TextArea(label="Text Corpus", placeholder="Paste the text corpus here", lines=5)
52
+ with gr.Row():
53
+ clear_btn = gr.Button(value="Clear", variant='stop')
54
+ summarize_btn = gr.Button(value="Summarize", variant='primary')
55
+ summary = gr.TextArea(label="Raw Data", placeholder="The generated raw data will be displayed here", lines=7, interactive=False, show_copy_button=True)
56
+
57
+ summarize_btn.click(
58
+ textrank_summarizer,
59
+ inputs=[text_corpus],
60
+ outputs=[summary],
61
+ concurrency_limit=25,
62
+ scroll_to_output=True,
63
+ show_api=True,
64
+ api_name="textrank_summarizer",
65
+ show_progress="full",
66
+ )
67
+ clear_btn.click(clear_everything, inputs=[text_corpus, summary], outputs=[text_corpus, summary], show_api=False)
68
+
69
+ nltk.download('punkt', quiet=True)
70
+ nltk.download('punkt_tab', quiet=True)
71
+ app.queue(default_concurrency_limit=25).launch(show_api=True)
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio==5.8.0
2
+ sumy==0.11.0
3
+ nltk==3.9.1