Spaces:
Runtime error
Runtime error
Commit
·
69f90b2
1
Parent(s):
92212fb
working on text splitting
Browse files
app.py
CHANGED
@@ -98,20 +98,21 @@ def main() -> None:
|
|
98 |
# return tuple(summarizer.abstractive_summary(list(summary_sentence)))
|
99 |
|
100 |
def split_text(text: str) -> list:
|
101 |
-
sentences = sent_tokenize(text)
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
|
|
115 |
|
116 |
pipe = create_pipeline()
|
117 |
|
|
|
98 |
# return tuple(summarizer.abstractive_summary(list(summary_sentence)))
|
99 |
|
100 |
def split_text(text: str) -> list:
|
101 |
+
sentences = sent_tokenize(text, language="english")
|
102 |
+
|
103 |
+
token_count = 0
|
104 |
+
text_block = ""
|
105 |
+
result = []
|
106 |
+
for sentence in sentences:
|
107 |
+
tokens = word_tokenize(sentence, language="english", preserve_line=True)
|
108 |
+
if token_count + len(tokens) < 500:
|
109 |
+
token_count += len(tokens)
|
110 |
+
text_block += " ".join(sentence)
|
111 |
+
else:
|
112 |
+
result.append(text_block)
|
113 |
+
text_block = "".join(sentence)
|
114 |
+
token_count = len(tokens)
|
115 |
+
return result
|
116 |
|
117 |
pipe = create_pipeline()
|
118 |
|