Spaces:
Runtime error
Runtime error
Commit
·
cb047cb
1
Parent(s):
eda5d31
add summary cleaning function
Browse files
app.py
CHANGED
@@ -25,10 +25,19 @@ def main() -> None:
|
|
25 |
tos_pipeline = pipeline(task="summarization",
|
26 |
model="ML-unipi/bart-large-tos",
|
27 |
tokenizer="ML-unipi/bart-large-tos",
|
28 |
-
device=0
|
29 |
)
|
30 |
return tos_pipeline
|
31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
def display_summary(summary_sentences: list) -> None:
|
33 |
st.subheader("Summary :male-detective:")
|
34 |
for sentence in summary_sentences:
|
@@ -56,7 +65,6 @@ def main() -> None:
|
|
56 |
cumulative_token_length = 0
|
57 |
|
58 |
for sentence in sentences:
|
59 |
-
# token_list = [token for token in nltk.word_tokenize(sentence)]
|
60 |
token_list = tokenizer(sentence, max_length=1024, truncation=True)
|
61 |
token_length = len(token_list["input_ids"])
|
62 |
if token_length > 10:
|
@@ -103,16 +111,9 @@ def main() -> None:
|
|
103 |
split_token_length=1024
|
104 |
)
|
105 |
for sentence in sentences:
|
106 |
-
# token_list = [token for token in nltk.word_tokenize(sentence)]
|
107 |
-
# st.markdown(sentence)
|
108 |
-
# st.markdown(str(len(token_list)))
|
109 |
output = pipe(sentence)
|
110 |
summary = output[0]["summary_text"]
|
111 |
-
|
112 |
-
for line in re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', summary):
|
113 |
-
if line.find(".") != -1:
|
114 |
-
line = line.replace("..", ".")
|
115 |
-
summary_sentences.append(line)
|
116 |
display_summary(summary_sentences)
|
117 |
|
118 |
|
|
|
25 |
tos_pipeline = pipeline(task="summarization",
|
26 |
model="ML-unipi/bart-large-tos",
|
27 |
tokenizer="ML-unipi/bart-large-tos",
|
|
|
28 |
)
|
29 |
return tos_pipeline
|
30 |
|
31 |
+
def clean_summaries(text: str) -> list:
|
32 |
+
result = []
|
33 |
+
lines = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', text)
|
34 |
+
for line in lines:
|
35 |
+
if line.find(".") != -1:
|
36 |
+
line = line.replace("..", ".")
|
37 |
+
result.append(line)
|
38 |
+
return result
|
39 |
+
|
40 |
+
|
41 |
def display_summary(summary_sentences: list) -> None:
|
42 |
st.subheader("Summary :male-detective:")
|
43 |
for sentence in summary_sentences:
|
|
|
65 |
cumulative_token_length = 0
|
66 |
|
67 |
for sentence in sentences:
|
|
|
68 |
token_list = tokenizer(sentence, max_length=1024, truncation=True)
|
69 |
token_length = len(token_list["input_ids"])
|
70 |
if token_length > 10:
|
|
|
111 |
split_token_length=1024
|
112 |
)
|
113 |
for sentence in sentences:
|
|
|
|
|
|
|
114 |
output = pipe(sentence)
|
115 |
summary = output[0]["summary_text"]
|
116 |
+
summary_sentences += clean_summaries(summary)
|
|
|
|
|
|
|
|
|
117 |
display_summary(summary_sentences)
|
118 |
|
119 |
|