Spaces:
Runtime error
Runtime error
Commit
·
b21075f
1
Parent(s):
2d14981
update requirements.txt
Browse files
app.py
CHANGED
@@ -3,6 +3,8 @@ import os
|
|
3 |
from typing import AnyStr
|
4 |
|
5 |
import nltk
|
|
|
|
|
6 |
import streamlit as st
|
7 |
import validators
|
8 |
from transformers import pipeline
|
@@ -96,20 +98,20 @@ def main() -> None:
|
|
96 |
# return tuple(summarizer.abstractive_summary(list(summary_sentence)))
|
97 |
|
98 |
def split_text(text: str) -> list:
|
99 |
-
|
100 |
-
sentences = []
|
101 |
token_count = 0
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
|
|
|
|
107 |
else:
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
return sentences
|
113 |
|
114 |
pipe = create_pipeline()
|
115 |
|
|
|
3 |
from typing import AnyStr
|
4 |
|
5 |
import nltk
|
6 |
+
from nltk.tokenize import sent_tokenize
|
7 |
+
from nltk.tokenize import word_tokenize
|
8 |
import streamlit as st
|
9 |
import validators
|
10 |
from transformers import pipeline
|
|
|
98 |
# return tuple(summarizer.abstractive_summary(list(summary_sentence)))
|
99 |
|
100 |
def split_text(text: str) -> list:
|
101 |
+
sentences = sent_tokenize(text)
|
|
|
102 |
token_count = 0
|
103 |
+
text_block = ""
|
104 |
+
result = []
|
105 |
+
for sentence in sentences:
|
106 |
+
tokens = word_tokenize(sentence)
|
107 |
+
if token_count + len(tokens) < 1024:
|
108 |
+
token_count += len(tokens)
|
109 |
+
text_block += " ".join(sentence)
|
110 |
else:
|
111 |
+
result.append(text_block)
|
112 |
+
text_block = "".join(sentence)
|
113 |
+
token_count = len(tokens)
|
114 |
+
return result
|
|
|
115 |
|
116 |
pipe = create_pipeline()
|
117 |
|