raannakasturi commited on
Commit
ba5f64c
·
verified ·
1 Parent(s): defd5de

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -71
app.py CHANGED
@@ -1,71 +1,68 @@
1
- from sumy.parsers.plaintext import PlaintextParser
2
- from sumy.nlp.tokenizers import Tokenizer
3
- from sumy.summarizers.text_rank import TextRankSummarizer
4
- from sumy.summarizers.luhn import LuhnSummarizer
5
- from sumy.summarizers.lex_rank import LexRankSummarizer
6
- from sumy.summarizers.lsa import LsaSummarizer
7
- from sumy.nlp.stemmers import Stemmer
8
- from sumy.utils import get_stop_words
9
- import gradio as gr
10
- import nltk
11
- import time
12
-
13
- def textrank_summarizer(text_corpus):
14
- start_time = time.time()
15
- parser = PlaintextParser.from_string(text_corpus, Tokenizer("english"))
16
- stemmer = Stemmer("english")
17
- summarizer = TextRankSummarizer(stemmer)
18
- summarizer.stop_words = get_stop_words("english")
19
- sentences = summarizer(parser.document, 15)
20
- summary = ""
21
- for sentence in sentences:
22
- summary += str(sentence) + ""
23
- end_time = time.time()
24
- print(f"Time taken: {end_time - start_time:.2f} seconds")
25
- return summary
26
-
27
- def clear_everything(text_corpus, summary):
28
- return None, None
29
-
30
- theme = gr.themes.Soft(
31
- primary_hue="purple",
32
- secondary_hue="cyan",
33
- neutral_hue="slate",
34
- font=[
35
- gr.themes.GoogleFont('Syne'),
36
- gr.themes.GoogleFont('Poppins'),
37
- gr.themes.GoogleFont('Poppins'),
38
- gr.themes.GoogleFont('Poppins')
39
- ],
40
- )
41
-
42
- with gr.Blocks(theme=theme, title="TextRank Summarizer", fill_height=True) as app:
43
- gr.HTML(
44
- value ='''
45
- <h1 style="text-align: center;">TextRank Summarizer</h1>
46
- <p style="text-align: center;">This app uses a TextRank approach to summarize PDF documents based on CPU.</p>
47
- <p style="text-align: center;">The summarization process can take some time depending on the size of the text corpus and the complexity of the content.</p>
48
- ''')
49
- with gr.Row():
50
- with gr.Column():
51
- text_corpus = gr.TextArea(label="Text Corpus", placeholder="Paste the text corpus here", lines=5)
52
- with gr.Row():
53
- clear_btn = gr.Button(value="Clear", variant='stop')
54
- summarize_btn = gr.Button(value="Summarize", variant='primary')
55
- summary = gr.TextArea(label="Raw Data", placeholder="The generated raw data will be displayed here", lines=7, interactive=False, show_copy_button=True)
56
-
57
- summarize_btn.click(
58
- textrank_summarizer,
59
- inputs=[text_corpus],
60
- outputs=[summary],
61
- concurrency_limit=25,
62
- scroll_to_output=True,
63
- show_api=True,
64
- api_name="textrank_summarizer",
65
- show_progress="full",
66
- )
67
- clear_btn.click(clear_everything, inputs=[text_corpus, summary], outputs=[text_corpus, summary], show_api=False)
68
-
69
- nltk.download('punkt', quiet=True)
70
- nltk.download('punkt_tab', quiet=True)
71
- app.queue(default_concurrency_limit=25).launch(show_api=True, max_threads=500, ssr_mode=False)
 
1
+ from sumy.parsers.plaintext import PlaintextParser
2
+ from sumy.nlp.tokenizers import Tokenizer
3
+ from sumy.summarizers.text_rank import TextRankSummarizer
4
+ from sumy.nlp.stemmers import Stemmer
5
+ from sumy.utils import get_stop_words
6
+ import gradio as gr
7
+ import nltk
8
+ import time
9
+
10
+ def textrank_summarizer(text_corpus):
11
+ start_time = time.time()
12
+ parser = PlaintextParser.from_string(text_corpus, Tokenizer("english"))
13
+ stemmer = Stemmer("english")
14
+ summarizer = TextRankSummarizer(stemmer)
15
+ summarizer.stop_words = get_stop_words("english")
16
+ sentences = summarizer(parser.document, 10)
17
+ summary = ""
18
+ for sentence in sentences:
19
+ summary += str(sentence) + ""
20
+ end_time = time.time()
21
+ print(f"Time taken: {end_time - start_time:.2f} seconds")
22
+ return summary
23
+
24
+ def clear_everything(text_corpus, summary):
25
+ return None, None
26
+
27
+ theme = gr.themes.Soft(
28
+ primary_hue="purple",
29
+ secondary_hue="cyan",
30
+ neutral_hue="slate",
31
+ font=[
32
+ gr.themes.GoogleFont('Syne'),
33
+ gr.themes.GoogleFont('Poppins'),
34
+ gr.themes.GoogleFont('Poppins'),
35
+ gr.themes.GoogleFont('Poppins')
36
+ ],
37
+ )
38
+
39
+ with gr.Blocks(theme=theme, title="TextRank Summarizer", fill_height=True) as app:
40
+ gr.HTML(
41
+ value ='''
42
+ <h1 style="text-align: center;">TextRank Summarizer</h1>
43
+ <p style="text-align: center;">This app uses a TextRank approach to summarize PDF documents based on CPU.</p>
44
+ <p style="text-align: center;">The summarization process can take some time depending on the size of the text corpus and the complexity of the content.</p>
45
+ ''')
46
+ with gr.Row():
47
+ with gr.Column():
48
+ text_corpus = gr.TextArea(label="Text Corpus", placeholder="Paste the text corpus here", lines=5)
49
+ with gr.Row():
50
+ clear_btn = gr.Button(value="Clear", variant='stop')
51
+ summarize_btn = gr.Button(value="Summarize", variant='primary')
52
+ summary = gr.TextArea(label="Raw Data", placeholder="The generated raw data will be displayed here", lines=7, interactive=False, show_copy_button=True)
53
+
54
+ summarize_btn.click(
55
+ textrank_summarizer,
56
+ inputs=[text_corpus],
57
+ outputs=[summary],
58
+ concurrency_limit=25,
59
+ scroll_to_output=True,
60
+ show_api=True,
61
+ api_name="textrank_summarizer",
62
+ show_progress="full",
63
+ )
64
+ clear_btn.click(clear_everything, inputs=[text_corpus, summary], outputs=[text_corpus, summary], show_api=False)
65
+
66
+ nltk.download('punkt', quiet=True)
67
+ nltk.download('punkt_tab', quiet=True)
68
+ app.queue(default_concurrency_limit=25).launch(show_api=True, max_threads=500, ssr_mode=False)