tommasobaldi commited on
Commit
7360b2b
·
1 Parent(s): 530ce31

working on text splitting

Browse files
Files changed (1) hide show
  1. app.py +17 -15
app.py CHANGED
@@ -99,19 +99,19 @@ def main() -> None:
99
 
100
  def split_text(text: str) -> list:
101
  sentences = sent_tokenize(text)
102
- token_count = 0
103
- text_block = ""
104
- result = []
105
- for sentence in sentences:
106
- tokens = word_tokenize(sentence)
107
- if token_count + len(tokens) < 500:
108
- token_count += len(tokens)
109
- text_block += " ".join(sentence)
110
- else:
111
- result.append(text_block)
112
- text_block = "".join(sentence)
113
- token_count = len(tokens)
114
- return result
115
 
116
  pipe = create_pipeline()
117
 
@@ -120,8 +120,10 @@ def main() -> None:
120
  with st.spinner("Summarizing in progress..."):
121
  sentences = split_text(target_text_input)
122
  for sentence in sentences:
123
- output = pipe(sentence)
124
- st.markdown(output["summary_text"])
 
 
125
 
126
 
127
 
 
99
 
100
  def split_text(text: str) -> list:
101
  sentences = sent_tokenize(text)
102
+ # token_count = 0
103
+ # text_block = ""
104
+ # result = []
105
+ # for sentence in sentences:
106
+ # tokens = word_tokenize(sentence)
107
+ # if token_count + len(tokens) < 500:
108
+ # token_count += len(tokens)
109
+ # text_block += " ".join(sentence)
110
+ # else:
111
+ # result.append(text_block)
112
+ # text_block = "".join(sentence)
113
+ # token_count = len(tokens)
114
+ return sentences
115
 
116
  pipe = create_pipeline()
117
 
 
120
  with st.spinner("Summarizing in progress..."):
121
  sentences = split_text(target_text_input)
122
  for sentence in sentences:
123
+ st.text(sentence)
124
+ st.line_chart
125
+ #output = pipe(sentence)
126
+ #st.markdown(output["summary_text"])
127
 
128
 
129