tommasobaldi commited on
Commit
6e3a821
·
1 Parent(s): 795ee13

working on text splitting

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -6,7 +6,6 @@ from transformers import pipeline, AutoTokenizer
6
 
7
 
8
  def main() -> None:
9
- nltk.download("punkt")
10
  # header
11
  st.title(":bookmark_tabs: Terms Of Service Summarizer :bookmark_tabs:")
12
  st.markdown("The app aims to extract the main information from Terms Of Conditions, which are often too long and "
@@ -70,6 +69,7 @@ def main() -> None:
70
  accumulated_lists.append(join_sentences(result_list))
71
  return accumulated_lists
72
 
 
73
  pipe = create_pipeline()
74
  tokenizer = AutoTokenizer.from_pretrained("ML-unipi/bart-large-tos")
75
 
@@ -94,7 +94,7 @@ def main() -> None:
94
  summarize_button = st.button(label="Try it!")
95
 
96
  if summarize_button:
97
- if target_text_input is not "":
98
  summary_sentences = []
99
  with st.spinner("Summarizing in progress..."):
100
  sentences = split_sentences_by_token_length(nltk.sent_tokenize(target_text_input, language="english"),
@@ -106,7 +106,7 @@ def main() -> None:
106
  # st.markdown(str(len(token_list)))
107
  output = pipe(sentence)
108
  summary = output[0]["summary_text"]
109
- for line in summary.split("."):
110
  if line != "":
111
  summary_sentences.append(line)
112
  display_summary(summary_sentences)
 
6
 
7
 
8
  def main() -> None:
 
9
  # header
10
  st.title(":bookmark_tabs: Terms Of Service Summarizer :bookmark_tabs:")
11
  st.markdown("The app aims to extract the main information from Terms Of Conditions, which are often too long and "
 
69
  accumulated_lists.append(join_sentences(result_list))
70
  return accumulated_lists
71
 
72
+ nltk.download("punkt")
73
  pipe = create_pipeline()
74
  tokenizer = AutoTokenizer.from_pretrained("ML-unipi/bart-large-tos")
75
 
 
94
  summarize_button = st.button(label="Try it!")
95
 
96
  if summarize_button:
97
+ if target_text_input != "":
98
  summary_sentences = []
99
  with st.spinner("Summarizing in progress..."):
100
  sentences = split_sentences_by_token_length(nltk.sent_tokenize(target_text_input, language="english"),
 
106
  # st.markdown(str(len(token_list)))
107
  output = pipe(sentence)
108
  summary = output[0]["summary_text"]
109
+ for line in summary.split(". "):
110
  if line != "":
111
  summary_sentences.append(line)
112
  display_summary(summary_sentences)