Spaces:
Sleeping
Sleeping
seanpedrickcase
commited on
Commit
•
49679bb
1
Parent(s):
6417426
Couple of changes to requirements
Browse files- app.py +53 -35
- requirements.txt +2 -0
app.py
CHANGED
@@ -150,12 +150,12 @@ model_type = "Flan T5 Large Stacked Samsum 1k"
|
|
150 |
load_model(model_type, chatf.gpu_layers, chatf.gpu_config, chatf.cpu_config, chatf.torch_device)
|
151 |
|
152 |
model_type = "Long T5 Global Base 16k Book Summary"
|
153 |
-
load_model(model_type,
|
154 |
|
155 |
today = datetime.now().strftime("%d%m%Y")
|
156 |
today_rev = datetime.now().strftime("%Y%m%d")
|
157 |
|
158 |
-
def summarise_text(text, text_df, length_slider, in_colname, model_type):
|
159 |
|
160 |
if text_df.empty:
|
161 |
in_colname="text"
|
@@ -164,15 +164,30 @@ def summarise_text(text, text_df, length_slider, in_colname, model_type):
|
|
164 |
in_text_df = pd.DataFrame({in_colname_list_first:[text]})
|
165 |
|
166 |
else:
|
167 |
-
in_text_df = text_df
|
168 |
-
in_colname_list_first = in_colname
|
169 |
|
170 |
print(model_type)
|
171 |
|
|
|
|
|
172 |
if model_type != "Mistral Nous Capybara 4k (larger, slow)":
|
173 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
174 |
|
175 |
-
|
|
|
|
|
|
|
|
|
176 |
|
177 |
if model_type == "Mistral Nous Capybara 4k (larger, slow)":
|
178 |
|
@@ -180,47 +195,52 @@ def summarise_text(text, text_df, length_slider, in_colname, model_type):
|
|
180 |
|
181 |
from chatfuncs.prompts import nous_capybara_prompt
|
182 |
|
183 |
-
|
184 |
-
#formatted_string = open_hermes_prompt.format(length=length, text=text)
|
185 |
|
186 |
-
|
187 |
|
188 |
-
|
189 |
-
for output in chatf.model(formatted_string, max_length = 10000):#, stream=True):
|
190 |
-
print(output, end="", flush=True)
|
191 |
|
192 |
-
|
|
|
193 |
|
194 |
-
|
195 |
-
|
196 |
|
197 |
-
|
198 |
-
if index != -1:
|
199 |
-
# Add the length of 'ASSISTANT: ' to the index to start from the end of this substring
|
200 |
-
start_index = index + len('ASSISTANT: ')
|
201 |
-
|
202 |
-
# Slice the string from this point to the end
|
203 |
-
assistant_text = output_str[start_index:]
|
204 |
-
else:
|
205 |
-
assistant_text = "ASSISTANT: not found in text"
|
206 |
|
207 |
-
|
208 |
|
209 |
-
|
|
|
210 |
|
211 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
212 |
|
213 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
214 |
|
215 |
if text_df.empty:
|
216 |
if model_type != "Mistral Nous Capybara 4k (larger, slow)":
|
217 |
-
summarised_text_out =
|
218 |
|
219 |
if model_type == "Mistral Nous Capybara 4k (larger, slow)":
|
220 |
-
summarised_text_out =
|
221 |
|
222 |
else:
|
223 |
-
summarised_text_out = [d['summary_text'] for d in
|
224 |
|
225 |
output_name = "summarise_output_" + today_rev + ".csv"
|
226 |
output_df = pd.DataFrame({"Original text":in_text_df[in_colname_list_first],
|
@@ -253,10 +273,8 @@ with block:
|
|
253 |
in_text = gr.Textbox(label="Copy and paste your open text here", lines = 5)
|
254 |
|
255 |
with gr.Accordion("Summarise open text from a file", open = False):
|
256 |
-
in_text_df = gr.File(label="Input text from file")
|
257 |
-
in_colname = gr.
|
258 |
-
type="numpy", row_count=(1,"fixed"), col_count = (1,"fixed"),
|
259 |
-
headers=["Open text column name"])#, "Address column name 2", "Address column name 3", "Address column name 4"])
|
260 |
|
261 |
with gr.Row():
|
262 |
summarise_btn = gr.Button("Summarise")
|
|
|
150 |
load_model(model_type, chatf.gpu_layers, chatf.gpu_config, chatf.cpu_config, chatf.torch_device)
|
151 |
|
152 |
model_type = "Long T5 Global Base 16k Book Summary"
|
153 |
+
load_model(model_type, chatf.gpu_layers, chatf.gpu_config, chatf.cpu_config, chatf.torch_device)
|
154 |
|
155 |
today = datetime.now().strftime("%d%m%Y")
|
156 |
today_rev = datetime.now().strftime("%Y%m%d")
|
157 |
|
158 |
+
def summarise_text(text, text_df, length_slider, in_colname, model_type, progress=gr.Progress()):
|
159 |
|
160 |
if text_df.empty:
|
161 |
in_colname="text"
|
|
|
164 |
in_text_df = pd.DataFrame({in_colname_list_first:[text]})
|
165 |
|
166 |
else:
|
167 |
+
in_text_df = text_df
|
168 |
+
in_colname_list_first = in_colname
|
169 |
|
170 |
print(model_type)
|
171 |
|
172 |
+
texts_list = list(in_text_df[in_colname_list_first])
|
173 |
+
|
174 |
if model_type != "Mistral Nous Capybara 4k (larger, slow)":
|
175 |
+
summarised_texts = []
|
176 |
+
|
177 |
+
for single_text in progress.tqdm(texts_list, desc = "Summarising texts", unit = "texts"):
|
178 |
+
summarised_text = chatf.model(single_text, max_length=length_slider)
|
179 |
+
|
180 |
+
#print(summarised_text)
|
181 |
+
|
182 |
+
summarised_text_str = summarised_text[0]['summary_text']
|
183 |
+
|
184 |
+
summarised_texts.append(summarised_text_str)
|
185 |
|
186 |
+
print(summarised_text_str)
|
187 |
+
|
188 |
+
#pd.Series(summarised_texts).to_csv("summarised_texts_out.csv")
|
189 |
+
|
190 |
+
#print(summarised_texts)
|
191 |
|
192 |
if model_type == "Mistral Nous Capybara 4k (larger, slow)":
|
193 |
|
|
|
195 |
|
196 |
from chatfuncs.prompts import nous_capybara_prompt
|
197 |
|
198 |
+
summarised_texts = []
|
|
|
199 |
|
200 |
+
for single_text in progress.tqdm(texts_list, desc = "Summarising texts", unit = "texts"):
|
201 |
|
202 |
+
formatted_string = nous_capybara_prompt.format(length=length, text=single_text)
|
|
|
|
|
203 |
|
204 |
+
# print(formatted_string)
|
205 |
+
output = chatf.model(formatted_string, max_length = 10000)
|
206 |
|
207 |
+
#for output in chatf.model(formatted_string, max_length = 10000):#, stream=True):
|
208 |
+
# print(output, end="", flush=True)
|
209 |
|
210 |
+
print(output)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
211 |
|
212 |
+
output_str = output[0]['generated_text']
|
213 |
|
214 |
+
# Find the index of 'ASSISTANT: ' to select only text after this location
|
215 |
+
index = output_str.find('ASSISTANT: ')
|
216 |
|
217 |
+
# Check if 'ASSISTANT: ' is found in the string
|
218 |
+
if index != -1:
|
219 |
+
# Add the length of 'ASSISTANT: ' to the index to start from the end of this substring
|
220 |
+
start_index = index + len('ASSISTANT: ')
|
221 |
+
|
222 |
+
# Slice the string from this point to the end
|
223 |
+
assistant_text = output_str[start_index:]
|
224 |
+
else:
|
225 |
+
assistant_text = "ASSISTANT: not found in text"
|
226 |
|
227 |
+
print(assistant_text)
|
228 |
+
|
229 |
+
summarised_texts.append(assistant_text)
|
230 |
+
|
231 |
+
#print(summarised_text)
|
232 |
+
|
233 |
+
#pd.Series(summarised_texts).to_csv("summarised_texts_out.csv")
|
234 |
|
235 |
if text_df.empty:
|
236 |
if model_type != "Mistral Nous Capybara 4k (larger, slow)":
|
237 |
+
summarised_text_out = summarised_texts[0]#.values()
|
238 |
|
239 |
if model_type == "Mistral Nous Capybara 4k (larger, slow)":
|
240 |
+
summarised_text_out = summarised_texts[0]
|
241 |
|
242 |
else:
|
243 |
+
summarised_text_out = summarised_texts #[d['summary_text'] for d in summarised_texts] #summarised_text[0].values()
|
244 |
|
245 |
output_name = "summarise_output_" + today_rev + ".csv"
|
246 |
output_df = pd.DataFrame({"Original text":in_text_df[in_colname_list_first],
|
|
|
273 |
in_text = gr.Textbox(label="Copy and paste your open text here", lines = 5)
|
274 |
|
275 |
with gr.Accordion("Summarise open text from a file", open = False):
|
276 |
+
in_text_df = gr.File(label="Input text from file", file_count='multiple')
|
277 |
+
in_colname = gr.Dropdown(label="Write the column name for the open text to summarise")
|
|
|
|
|
278 |
|
279 |
with gr.Row():
|
280 |
summarise_btn = gr.Button("Summarise")
|
requirements.txt
CHANGED
@@ -1,4 +1,6 @@
|
|
1 |
gradio==3.50.0
|
2 |
transformers
|
3 |
torch
|
|
|
|
|
4 |
ctransformers[cuda]
|
|
|
1 |
gradio==3.50.0
|
2 |
transformers
|
3 |
torch
|
4 |
+
pyarrow
|
5 |
+
openpyxl
|
6 |
ctransformers[cuda]
|