tommasobaldi commited on
Commit
b775d33
·
1 Parent(s): bf8859b

working on text splitting

Browse files
Files changed (1) hide show
  1. app.py +1 -1
app.py CHANGED
@@ -54,7 +54,7 @@ def main() -> None:
54
  result_list = []
55
  cumulative_token_length = 0
56
  for sentence in sentences:
57
- token_list = [token for token in nltk.word_tokenize(sentence) if token not in ['.']]
58
  token_length = len(token_list)
59
  if token_length + cumulative_token_length > split_token_length and result_list:
60
  accumulated_lists.append(join_sentences(result_list))
 
54
  result_list = []
55
  cumulative_token_length = 0
56
  for sentence in sentences:
57
+ token_list = [token for token in nltk.word_tokenize(sentence)]
58
  token_length = len(token_list)
59
  if token_length + cumulative_token_length > split_token_length and result_list:
60
  accumulated_lists.append(join_sentences(result_list))