Spaces:
Runtime error
Runtime error
Commit
·
d33b093
1
Parent(s):
edce3dc
add requirements.txt
Browse files- Summarizer.py +0 -56
- app.py +12 -15
Summarizer.py
DELETED
@@ -1,56 +0,0 @@
|
|
1 |
-
import nltk
|
2 |
-
|
3 |
-
from sumy.nlp.stemmers import Stemmer
|
4 |
-
from sumy.summarizers.lsa import LsaSummarizer
|
5 |
-
from sumy.utils import get_stop_words
|
6 |
-
from transformers import Pipeline
|
7 |
-
|
8 |
-
class Summarizer:
|
9 |
-
DEFAULT_LANGUAGE = "english"
|
10 |
-
DEFAULT_SENTENCE_LENGTH = 15
|
11 |
-
|
12 |
-
def __init__(self, pipeline: Pipeline):
|
13 |
-
self.pipeline = pipeline
|
14 |
-
stemmer = Stemmer(Summarizer.DEFAULT_LANGUAGE)
|
15 |
-
self.lsa_summarizer = LsaSummarizer(stemmer)
|
16 |
-
self.lsa_summarizer.stop_words = get_stop_words(language=Summarizer.DEFAULT_LANGUAGE)
|
17 |
-
|
18 |
-
@staticmethod
|
19 |
-
def sentence_list(summarized_sentences) -> list:
|
20 |
-
summarized_list = []
|
21 |
-
for sentence in summarized_sentences:
|
22 |
-
summarized_list.append(sentence._text)
|
23 |
-
return summarized_list
|
24 |
-
|
25 |
-
@staticmethod
|
26 |
-
def join_sentences(summarized_sentences: list) -> str:
|
27 |
-
return " ".join([sentence for sentence in summarized_sentences])
|
28 |
-
|
29 |
-
@staticmethod
|
30 |
-
def split_sentences_by_token_length(summary_sentences: list, split_token_length: int) -> list:
|
31 |
-
accumulated_list = []
|
32 |
-
result_list = []
|
33 |
-
cumulative_token_length = 0
|
34 |
-
for sentence in summary_sentences:
|
35 |
-
token_list = [token for token in nltk.word_tokenize(sentence) if token not in ["."]]
|
36 |
-
token_length = len(token_list)
|
37 |
-
if token_length + cumulative_token_length > split_token_length and result_list:
|
38 |
-
accumulated_list.append(Summarizer.join_sentences(result_list))
|
39 |
-
result_list = [sentence]
|
40 |
-
cumulative_token_length = token_length
|
41 |
-
else:
|
42 |
-
result_list.append(sentence)
|
43 |
-
cumulative_token_length += token_length
|
44 |
-
|
45 |
-
if result_list:
|
46 |
-
accumulated_list.append(Summarizer.join_sentences(result_list))
|
47 |
-
|
48 |
-
return accumulated_list
|
49 |
-
|
50 |
-
def abstractive_summary(self, summary_sentences: list) -> list:
|
51 |
-
wrapped_sentences = Summarizer.split_sentences_by_token_length(summary_sentences, split_token_length=600)
|
52 |
-
summary_list = []
|
53 |
-
for result in self.pipeline(wrapped_sentences, min_length=32, max_length=512):
|
54 |
-
summary_list.append(result['summary_text'])
|
55 |
-
|
56 |
-
return summary_list
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
CHANGED
@@ -8,8 +8,6 @@ import validators
|
|
8 |
from transformers import pipeline
|
9 |
from validators import ValidationFailure
|
10 |
|
11 |
-
from Summarizer import Summarizer
|
12 |
-
|
13 |
|
14 |
def main() -> None:
|
15 |
nltk.download("punkt")
|
@@ -58,12 +56,10 @@ def main() -> None:
|
|
58 |
text = file.read()
|
59 |
return text
|
60 |
|
61 |
-
summarizer: Summarizer = Summarizer(create_pipeline())
|
62 |
-
|
63 |
if "target_text" not in st.session_state:
|
64 |
st.session_state.target_text = ""
|
65 |
if "sentence_lenght" not in st.session_state:
|
66 |
-
st.session_state.sentence_length =
|
67 |
if "sample_choice" not in st.session_state:
|
68 |
st.session_state.sentence_length = ""
|
69 |
|
@@ -89,18 +85,19 @@ def main() -> None:
|
|
89 |
|
90 |
summarize_button = st.button(label="Try it!")
|
91 |
|
92 |
-
@st.cache(suppress_st_warning=True,
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
|
|
99 |
|
100 |
|
101 |
-
def summary_from_cache(summary_sentence: tuple) -> tuple:
|
102 |
-
|
103 |
-
|
104 |
|
105 |
if summarize_button:
|
106 |
output = pipeline(st.session_state.target_text)
|
|
|
8 |
from transformers import pipeline
|
9 |
from validators import ValidationFailure
|
10 |
|
|
|
|
|
11 |
|
12 |
def main() -> None:
|
13 |
nltk.download("punkt")
|
|
|
56 |
text = file.read()
|
57 |
return text
|
58 |
|
|
|
|
|
59 |
if "target_text" not in st.session_state:
|
60 |
st.session_state.target_text = ""
|
61 |
if "sentence_lenght" not in st.session_state:
|
62 |
+
st.session_state.sentence_length = 15
|
63 |
if "sample_choice" not in st.session_state:
|
64 |
st.session_state.sentence_length = ""
|
65 |
|
|
|
85 |
|
86 |
summarize_button = st.button(label="Try it!")
|
87 |
|
88 |
+
# @st.cache(suppress_st_warning=True,
|
89 |
+
# show_spinner=False,
|
90 |
+
# allow_output_mutation=True,
|
91 |
+
# hash_funcs={"torch.nn.parameter.Parameter": lambda _: None,
|
92 |
+
# "tokenizers.Tokenizer": lambda _: None,
|
93 |
+
# "tokenizers.AddedToken": lambda _: None,
|
94 |
+
# }
|
95 |
+
# )
|
96 |
|
97 |
|
98 |
+
# def summary_from_cache(summary_sentence: tuple) -> tuple:
|
99 |
+
# with st.spinner("Summarizing in progress..."):
|
100 |
+
# return tuple(summarizer.abstractive_summary(list(summary_sentence)))
|
101 |
|
102 |
if summarize_button:
|
103 |
output = pipeline(st.session_state.target_text)
|