Spaces:
Runtime error
Runtime error
Commit
·
795ee13
1
Parent(s):
c1aef33
working on text splitting
Browse files
app.py
CHANGED
@@ -58,13 +58,14 @@ def main() -> None:
|
|
58 |
# token_list = [token for token in nltk.word_tokenize(sentence)]
|
59 |
token_list = tokenizer(sentence, max_length=1024, truncation=True)
|
60 |
token_length = len(token_list["input_ids"])
|
61 |
-
if token_length
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
|
|
68 |
if result_list:
|
69 |
accumulated_lists.append(join_sentences(result_list))
|
70 |
return accumulated_lists
|
|
|
58 |
# token_list = [token for token in nltk.word_tokenize(sentence)]
|
59 |
token_list = tokenizer(sentence, max_length=1024, truncation=True)
|
60 |
token_length = len(token_list["input_ids"])
|
61 |
+
if token_length > 10:
|
62 |
+
if token_length + cumulative_token_length > split_token_length and result_list:
|
63 |
+
accumulated_lists.append(join_sentences(result_list))
|
64 |
+
result_list = [sentence]
|
65 |
+
cumulative_token_length = token_length
|
66 |
+
else:
|
67 |
+
result_list.append(sentence)
|
68 |
+
cumulative_token_length += token_length
|
69 |
if result_list:
|
70 |
accumulated_lists.append(join_sentences(result_list))
|
71 |
return accumulated_lists
|