lossLopes commited on
Commit
5ca455e
·
1 Parent(s): 135d3c0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -1
app.py CHANGED
@@ -56,7 +56,8 @@ iface = gr.Interface(
56
 
57
  # Launch the Gradio Interface
58
  iface.launch()
59
- """
 
60
  import gradio as gr
61
  import nltk
62
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
@@ -118,3 +119,66 @@ iface = gr.Interface(
118
  )
119
 
120
  iface.launch(share=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
  # Launch the Gradio Interface
58
  iface.launch()
59
+
60
+
61
  import gradio as gr
62
  import nltk
63
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 
119
  )
120
 
121
  iface.launch(share=True)
122
+ """"
123
+
124
+ import gradio as gr
125
+ import nltk
126
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
127
+
128
+ nltk.download('punkt')
129
+
130
+ def fragment_text(text, tokenizer):
131
+ sentences = nltk.tokenize.sent_tokenize(text)
132
+ max_len = tokenizer.max_len_single_sentence
133
+
134
+ chunks = []
135
+ chunk = ""
136
+ count = -1
137
+
138
+ for sentence in sentences:
139
+ count += 1
140
+ combined_length = len(tokenizer.tokenize(sentence)) + len(chunk)
141
+
142
+ if combined_length <= max_len:
143
+ chunk += sentence + " "
144
+ else:
145
+ chunks.append(chunk.strip())
146
+ chunk = sentence + " "
147
+
148
+ if chunk != "":
149
+ chunks.append(chunk.strip())
150
+
151
+ return chunks
152
+
153
+
154
+ def summarize_text(text, tokenizer, model):
155
+ chunks = fragment_text(text, tokenizer)
156
+
157
+ summaries = []
158
+ for chunk in chunks:
159
+ input = tokenizer(chunk, return_tensors='pt')
160
+ output = model.generate(**input)
161
+ summary = tokenizer.decode(*output, skip_special_tokens=True)
162
+ summaries.append(summary)
163
+
164
+ final_summary = " ".join(summaries)
165
+ return final_summary
166
+
167
+ checkpoint = "tclopess/bart_samsum"
168
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
169
+ model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
170
+
171
+ def summarize_and_display(text):
172
+ summary = summarize_text(text, tokenizer, model)
173
+ return summary
174
+
175
+ iface = gr.Interface(
176
+ fn=summarize_and_display,
177
+ inputs=gr.Textbox(label="Enter text to summarize:"),
178
+ outputs=gr.Textbox(label="Summary:"),
179
+ live=False, # Set live to False to add a button
180
+ button="Summarize", # Add a button with the label "Summarize"
181
+ title="Text Summarizer with Button",
182
+ )
183
+
184
+ iface.launch(share=True)