ashhadahsan commited on
Commit
c1dd675
Β·
1 Parent(s): cc5f850

Update pages/1_πŸ“ˆ_predict.py

Browse files
Files changed (1) hide show
  1. pages/1_πŸ“ˆ_predict.py +129 -155
pages/1_πŸ“ˆ_predict.py CHANGED
@@ -1,37 +1,36 @@
1
  import streamlit as st
2
  import pandas as pd
 
 
3
  from transformers import pipeline
4
  from stqdm import stqdm
5
  from simplet5 import SimpleT5
6
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
7
  from transformers import BertTokenizer, TFBertForSequenceClassification
8
- from datetime import datetime
9
  import logging
10
- from transformers import TextClassificationPipeline
11
- import gc
12
  from datasets import load_dataset
13
- from utils.openllmapi.api import ChatBot
14
- from utils.openllmapi.exceptions import *
15
- import time
16
  from typing import List
17
  from collections import OrderedDict
 
18
 
 
19
 
20
- tokenizer_kwargs = dict(
21
- max_length=128,
22
- truncation=True,
23
- padding=True,
24
- )
25
  SLEEP = 2
26
 
27
 
28
- def cleanMemory(obj: TextClassificationPipeline):
 
 
 
29
  del obj
30
  gc.collect()
31
 
32
 
33
  @st.cache_data
34
- def getAllCats():
35
  data = load_dataset("ashhadahsan/amazon_theme")
36
  data = data["train"].to_pandas()
37
  labels = [x for x in list(set(data.iloc[:, 1].values.tolist())) if x != "Unknown"]
@@ -40,108 +39,97 @@ def getAllCats():
40
 
41
 
42
  @st.cache_data
43
- def getAllSubCats():
44
- data = load_dataset("ashhadahsan/amazon_theme")
45
  data = data["train"].to_pandas()
46
  labels = [x for x in list(set(data.iloc[:, 1].values.tolist())) if x != "Unknown"]
47
  del data
48
  return labels
49
 
50
 
51
- def assignHF(bot, what: str, to: str, old: List):
52
- try:
53
- old = ", ".join(old)
54
- message_content = bot.chat(
55
- f"""'Assign a one-line {what} to this summary of the text of a review
56
- {to}
57
- already assigned themes are , {old}
58
- theme""",
59
- )
60
- try:
61
- return message_content.split(":")[1].strip()
62
- except:
63
- return message_content.strip()
64
- except ChatError:
65
- return ""
66
-
67
-
68
- def assignOpen(bot, what: str, to: str, old: List):
69
- old = ", ".join(old)
70
- template = """'Assign a one-line {what} to this summary of the text of a review
71
- {to}
72
- already assigned themes are , {old}
73
- theme"""
74
- prompt = PromptTemplate(template=template, input_variables=["what", "to", "old"])
75
- llm_chain = LLMChain(prompt=prompt, llm=bot)
76
- generated = llm_chain.run(what=what, to=summary, old=old)
77
- return generated
78
-
79
-
80
- @st.cache_resource
81
- def loadZeroShotClassification():
82
- classifierzero = pipeline(
83
- "zero-shot-classification", model="facebook/bart-large-mnli"
84
- )
85
- return classifierzero
86
-
87
-
88
  @st.cache_resource
89
- def loadopenModel():
90
- llm = OpenLLM(
91
- model_name="dolly-v2",
92
- model_id="databricks/dolly-v2-3b",
93
- temperature=0.94,
94
- repetition_penalty=1.2,
95
  )
96
- return llm
97
 
98
 
99
- def assignZeroShot(zero, to: str, old: List):
100
  assigned = zero(to, old)
101
- assigneddict = dict(zip(assigned["labels"], assigned["scores"]))
102
- od = OrderedDict(sorted(assigneddict.items(), key=lambda x: x[1], reverse=True))
103
  print(list(od.keys())[0])
104
  print(type(list(od.keys())[0]))
105
 
106
  return list(od.keys())[0]
107
 
108
 
109
- date = datetime.now().strftime(r"%Y-%m-%d")
 
 
 
 
 
 
 
 
110
 
111
 
112
  @st.cache_resource
113
  def load_t5() -> (AutoModelForSeq2SeqLM, AutoTokenizer):
114
- model = AutoModelForSeq2SeqLM.from_pretrained("t5-base")
 
 
115
 
116
- tokenizer = AutoTokenizer.from_pretrained("t5-base")
 
 
117
  return model, tokenizer
118
 
119
 
 
 
 
 
 
 
 
 
 
120
  @st.cache_resource
121
  def summarizationModel():
122
- return pipeline("summarization", model="my_awesome_sum/")
 
 
 
123
 
124
 
125
  @st.cache_resource
126
  def convert_df(df: pd.DataFrame):
127
- # IMPORTANT: Cache the conversion to prevent computation on every rerun
128
  return df.to_csv(index=False).encode("utf-8")
129
 
130
 
131
  def load_one_line_summarizer(model):
132
- return model.load_model("t5", "snrspeaks/t5-one-line-summary")
 
 
 
133
 
134
 
135
  @st.cache_resource
136
  def classify_theme() -> TextClassificationPipeline:
137
  tokenizer = BertTokenizer.from_pretrained(
138
- "ashhadahsan/amazon-theme-bert-base-finetuned"
139
  )
140
  model = TFBertForSequenceClassification.from_pretrained(
141
- "ashhadahsan/amazon-theme-bert-base-finetuned"
142
  )
143
  pipeline = TextClassificationPipeline(
144
- model=model, tokenizer=tokenizer, top_k=1, **tokenizer_kwargs
 
 
145
  )
146
  return pipeline
147
 
@@ -149,46 +137,37 @@ def classify_theme() -> TextClassificationPipeline:
149
  @st.cache_resource
150
  def classify_sub_theme() -> TextClassificationPipeline:
151
  tokenizer = BertTokenizer.from_pretrained(
152
- "ashhadahsan/amazon-subtheme-bert-base-finetuned"
153
  )
154
  model = TFBertForSequenceClassification.from_pretrained(
155
- "ashhadahsan/amazon-subtheme-bert-base-finetuned"
156
  )
157
  pipeline = TextClassificationPipeline(
158
- model=model, tokenizer=tokenizer, top_k=1, **tokenizer_kwargs
159
  )
160
  return pipeline
161
 
162
 
163
  st.set_page_config(layout="wide", page_title="Amazon Review | Summarizer")
164
- st.title("Amazon Review Summarizer")
165
 
166
- uploaded_file = st.file_uploader("Choose a file", type=["xlsx", "xls", "csv"])
167
 
168
- # try:
169
- # bot = ChatBot(
170
- # cookies={
171
- # "hf-chat": st.secrets["hf-chat"],
172
- # "token": st.secrets["token"],
173
- # }
174
- # )
175
- # except ChatBotInitError as e:
176
- # print(e)
177
 
178
  summarizer_option = st.selectbox(
179
- "Select Summarizer",
180
- ("Custom trained on the dataset", "t5-base", "t5-one-line-summary"),
181
  )
182
- col1, col2, col3 = st.columns([1, 1, 1])
183
 
184
  with col1:
185
- summary_yes = st.checkbox("Summrization", value=False)
186
 
187
  with col2:
188
- classification = st.checkbox("Classify Category", value=True)
189
 
190
  with col3:
191
- sub_theme = st.checkbox("Sub theme classification", value=True)
192
 
193
  treshold = st.slider(
194
  label="Model Confidence value",
@@ -202,23 +181,22 @@ treshold = st.slider(
202
  ps = st.empty()
203
 
204
  if st.button("Process", type="primary"):
205
- themes = getAllCats()
206
- subthemes = getAllSubCats()
207
- # st.write(themes)
208
 
209
  oneline = SimpleT5()
210
  load_one_line_summarizer(model=oneline)
211
- zeroline = loadZeroShotClassification()
212
- bot = loadopenModel()
213
 
214
  cancel_button = st.empty()
215
  cancel_button2 = st.empty()
216
  cancel_button3 = st.empty()
217
  if uploaded_file is not None:
218
  if uploaded_file.name.split(".")[-1] in ["xls", "xlsx"]:
219
- df = pd.read_excel(uploaded_file, engine="openpyxl")
220
  if uploaded_file.name.split(".")[-1] in [".csv"]:
221
- df = pd.read_csv(uploaded_file)
222
  columns = df.columns.values.tolist()
223
  columns = [x.lower() for x in columns]
224
  df.columns = columns
@@ -234,7 +212,7 @@ if st.button("Process", type="primary"):
234
  progress_text = "Summarization in progress. Please wait."
235
  summary = []
236
 
237
- for x in stqdm(range(len(text))):
238
  if cancel_button.button("Cancel", key=x):
239
  del model
240
  break
@@ -256,28 +234,28 @@ if st.button("Process", type="primary"):
256
  classesUnlabel = []
257
  classesUnlabelZero = []
258
  for x in stqdm(
259
- text,
260
  desc="Assigning Themes ...",
261
  total=len(text),
262
  colour="#BF1A1A",
263
  ):
264
- output = themePipe(x)[0][0]["label"]
265
  classes.append(output)
266
- score = round(themePipe(x)[0][0]["score"], 2)
267
  if score <= treshold:
268
- onelineoutput = oneline.predict(x)[0]
269
- time.sleep(SLEEP)
270
  print("hit")
271
  classesUnlabel.append(
272
- assignOpen(
273
- bot=bot,
274
  what="theme",
275
  to=onelineoutput,
276
  old=themes,
277
  )
278
  )
279
  classesUnlabelZero.append(
280
- assignZeroShot(
281
  zero=zeroline, to=onelineoutput, old=themes
282
  )
283
  )
@@ -289,37 +267,35 @@ if st.button("Process", type="primary"):
289
  outputdf["Review Theme"] = classes
290
  outputdf["Review Theme-issue-new"] = classesUnlabel
291
  outputdf["Review SubTheme-issue-zero"] = classesUnlabelZero
292
- cleanMemory(themePipe)
293
  if sub_theme:
294
  subThemePipe = classify_sub_theme()
295
  classes = []
296
  classesUnlabel = []
297
  classesUnlabelZero = []
298
  for x in stqdm(
299
- text,
300
  desc="Assigning Subthemes ...",
301
  total=len(text),
302
  colour="green",
303
  ):
304
- output = subThemePipe(x)[0][0]["label"]
305
  classes.append(output)
306
- score = round(subThemePipe(x)[0][0]["score"], 2)
307
  if score <= treshold:
308
  onelineoutput = oneline.predict(x)[0]
309
 
310
- time.sleep(SLEEP)
311
-
312
  print("hit")
313
  classesUnlabel.append(
314
- assignOpen(
315
- bot=bot,
316
  what="subtheme",
317
  to=onelineoutput,
318
  old=subthemes,
319
  )
320
  )
321
  classesUnlabelZero.append(
322
- assignZeroShot(
323
  zero=zeroline,
324
  to=onelineoutput,
325
  old=subthemes,
@@ -334,7 +310,7 @@ if st.button("Process", type="primary"):
334
  outputdf["Review SubTheme-issue-new"] = classesUnlabel
335
  outputdf["Review SubTheme-issue-zero"] = classesUnlabelZero
336
 
337
- cleanMemory(subThemePipe)
338
 
339
  csv = convert_df(outputdf)
340
  st.download_button(
@@ -380,25 +356,24 @@ if st.button("Process", type="primary"):
380
  for x in stqdm(
381
  text, desc="Assigning Themes ...", total=len(text), colour="red"
382
  ):
383
- output = themePipe(x)[0][0]["label"]
384
  classes.append(output)
385
- score = round(themePipe(x)[0][0]["score"], 2)
386
  if score <= treshold:
387
  onelineoutput = oneline.predict(x)[0]
388
 
389
  print("hit")
390
- time.sleep(SLEEP)
391
 
392
  classesUnlabel.append(
393
- assignOpen(
394
- bot=bot,
395
  what="theme",
396
  to=onelineoutput,
397
  old=themes,
398
  )
399
  )
400
  classesUnlabelZero.append(
401
- assignZeroShot(
402
  zero=zeroline, to=onelineoutput, old=themes
403
  )
404
  )
@@ -409,7 +384,7 @@ if st.button("Process", type="primary"):
409
  outputdf["Review Theme"] = classes
410
  outputdf["Review Theme-issue-new"] = classesUnlabel
411
  outputdf["Review SubTheme-issue-zero"] = classesUnlabelZero
412
- cleanMemory(themePipe)
413
 
414
  if sub_theme:
415
  subThemePipe = classify_sub_theme()
@@ -422,24 +397,23 @@ if st.button("Process", type="primary"):
422
  total=len(text),
423
  colour="green",
424
  ):
425
- output = subThemePipe(x)[0][0]["label"]
426
  classes.append(output)
427
- score = round(subThemePipe(x)[0][0]["score"], 2)
428
  if score <= treshold:
429
  onelineoutput = oneline.predict(x)[0]
430
 
431
- time.sleep(SLEEP)
432
  print("hit")
433
  classesUnlabel.append(
434
- assignOpen(
435
- bot=bot,
436
  what="subtheme",
437
  to=onelineoutput,
438
  old=subthemes,
439
  )
440
  )
441
  classesUnlabelZero.append(
442
- assignZeroShot(
443
  zero=zeroline,
444
  to=onelineoutput,
445
  old=subthemes,
@@ -454,7 +428,7 @@ if st.button("Process", type="primary"):
454
  outputdf["Review SubTheme-issue-new"] = classesUnlabel
455
  outputdf["Review SubTheme-issue-zero"] = classesUnlabelZero
456
 
457
- cleanMemory(subThemePipe)
458
 
459
  csv = convert_df(outputdf)
460
  st.download_button(
@@ -471,12 +445,12 @@ if st.button("Process", type="primary"):
471
  load_one_line_summarizer(model=model)
472
 
473
  summary = []
474
- for x in stqdm(range(len(text))):
475
- if cancel_button3.button("Cancel", key=x):
476
  del model
477
  break
478
  try:
479
- summary.append(model.predict(text[x])[0])
480
  except:
481
  pass
482
  outputdf["summary"] = summary
@@ -488,27 +462,28 @@ if st.button("Process", type="primary"):
488
  classesUnlabel = []
489
  classesUnlabelZero = []
490
  for x in stqdm(
491
- text, desc="Assigning Themes ...", total=len(text), colour="red"
 
 
 
492
  ):
493
- output = themePipe(x)[0][0]["label"]
494
  classes.append(output)
495
- score = round(themePipe(x)[0][0]["score"], 2)
496
  if score <= treshold:
497
  onelineoutput = oneline.predict(x)[0]
498
 
499
- time.sleep(SLEEP)
500
-
501
  print("hit")
502
  classesUnlabel.append(
503
- assignOpen(
504
- bot=bot,
505
  what="theme",
506
  to=onelineoutput,
507
  old=themes,
508
  )
509
  )
510
  classesUnlabelZero.append(
511
- assignZeroShot(
512
  zero=zeroline, to=onelineoutput, old=themes
513
  )
514
  )
@@ -526,29 +501,28 @@ if st.button("Process", type="primary"):
526
  classesUnlabelZero = []
527
 
528
  for x in stqdm(
529
- text,
530
  desc="Assigning Subthemes ...",
531
  total=len(text),
532
  colour="green",
533
  ):
534
- output = subThemePipe(x)[0][0]["label"]
535
  classes.append(output)
536
- score = round(subThemePipe(x)[0][0]["score"], 2)
537
  if score <= treshold:
538
  print("hit")
539
- onelineoutput = oneline.predict(x)[0]
540
 
541
- time.sleep(SLEEP)
542
  classesUnlabel.append(
543
- assignOpen(
544
- bot=bot,
545
  what="subtheme",
546
  to=onelineoutput,
547
  old=subthemes,
548
  )
549
  )
550
  classesUnlabelZero.append(
551
- assignZeroShot(
552
  zero=zeroline,
553
  to=onelineoutput,
554
  old=subthemes,
@@ -563,7 +537,7 @@ if st.button("Process", type="primary"):
563
  outputdf["Review SubTheme-issue-new"] = classesUnlabel
564
  outputdf["Review SubTheme-issue-zero"] = classesUnlabelZero
565
 
566
- cleanMemory(subThemePipe)
567
 
568
  csv = convert_df(outputdf)
569
  st.download_button(
@@ -576,11 +550,11 @@ if st.button("Process", type="primary"):
576
 
577
  except KeyError as e:
578
  st.error(
579
- "Please Make sure that your data must have a column named text",
580
  icon="🚨",
581
  )
582
- st.info("Text column must have amazon reviews", icon="ℹ️")
583
- # st.exception(e)
584
 
585
  except BaseException as e:
586
- logging.exception("An exception was occurred")
 
1
  import streamlit as st
2
  import pandas as pd
3
+ from transformers import BertTokenizer, TFBertForSequenceClassification
4
+ from transformers import TextClassificationPipeline
5
  from transformers import pipeline
6
  from stqdm import stqdm
7
  from simplet5 import SimpleT5
8
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
9
  from transformers import BertTokenizer, TFBertForSequenceClassification
 
10
  import logging
 
 
11
  from datasets import load_dataset
12
+ import gc
 
 
13
  from typing import List
14
  from collections import OrderedDict
15
+ from datetime import datetime
16
 
17
+ tokenizer_kwargs = dict(max_length=128, truncation=True, padding=True)
18
 
19
+
20
+ flan_t5_kwargs = dict(repetition_penalty=1.2)
 
 
 
21
  SLEEP = 2
22
 
23
 
24
+ date = datetime.now().strftime(r"%Y-%m-%d")
25
+
26
+
27
+ def clean_memory(obj: TextClassificationPipeline):
28
  del obj
29
  gc.collect()
30
 
31
 
32
  @st.cache_data
33
+ def get_all_cats():
34
  data = load_dataset("ashhadahsan/amazon_theme")
35
  data = data["train"].to_pandas()
36
  labels = [x for x in list(set(data.iloc[:, 1].values.tolist())) if x != "Unknown"]
 
39
 
40
 
41
  @st.cache_data
42
+ def get_all_subcats():
43
+ data = load_dataset("ashhadahsan/amazon_subtheme")
44
  data = data["train"].to_pandas()
45
  labels = [x for x in list(set(data.iloc[:, 1].values.tolist())) if x != "Unknown"]
46
  del data
47
  return labels
48
 
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  @st.cache_resource
51
+ def load_zero_shot_classification_large():
52
+ classifier_zero = pipeline(
53
+ "zero-shot-classification",
54
+ model="facebook/bart-large-mnli",
 
 
55
  )
56
+ return classifier_zero
57
 
58
 
59
+ def assign_label_zeroshot(zero, to: str, old: List):
60
  assigned = zero(to, old)
61
+ assigned_dict = dict(zip(assigned["labels"], assigned["scores"]))
62
+ od = OrderedDict(sorted(assigned_dict.items(), key=lambda x: x[1], reverse=True))
63
  print(list(od.keys())[0])
64
  print(type(list(od.keys())[0]))
65
 
66
  return list(od.keys())[0]
67
 
68
 
69
+ def assign_labels_flant5(pipe, what: str, to: str, old: List):
70
+ old = ", ".join(old)
71
+
72
+ return pipe(
73
+ f"""'Generate a new one word {what} to this summary of the text of a review
74
+ {to} for context
75
+ already assigned {what} are , {themes}
76
+ theme:"""
77
+ )[0]["generated_text"]
78
 
79
 
80
  @st.cache_resource
81
  def load_t5() -> (AutoModelForSeq2SeqLM, AutoTokenizer):
82
+ model = AutoModelForSeq2SeqLM.from_pretrained(
83
+ "t5-base",
84
+ )
85
 
86
+ tokenizer = AutoTokenizer.from_pretrained(
87
+ pretrained_model_name_or_path="t5-base",
88
+ )
89
  return model, tokenizer
90
 
91
 
92
+ @st.cache_resource
93
+ def load_flan_t5_large():
94
+ return pipeline(
95
+ task="text2text-generation",
96
+ model="google/flan-t5-large",
97
+ model_kwargs=flan_t5_kwargs,
98
+ )
99
+
100
+
101
  @st.cache_resource
102
  def summarizationModel():
103
+ return pipeline(
104
+ task="summarization",
105
+ model="my_awesome_sum/",
106
+ )
107
 
108
 
109
  @st.cache_resource
110
  def convert_df(df: pd.DataFrame):
 
111
  return df.to_csv(index=False).encode("utf-8")
112
 
113
 
114
  def load_one_line_summarizer(model):
115
+ return model.load_model(
116
+ "t5",
117
+ "snrspeaks/t5-one-line-summary",
118
+ )
119
 
120
 
121
  @st.cache_resource
122
  def classify_theme() -> TextClassificationPipeline:
123
  tokenizer = BertTokenizer.from_pretrained(
124
+ "ashhadahsan/amazon-theme-bert-base-finetuned",
125
  )
126
  model = TFBertForSequenceClassification.from_pretrained(
127
+ "ashhadahsan/amazon-theme-bert-base-finetuned",
128
  )
129
  pipeline = TextClassificationPipeline(
130
+ model=model,
131
+ tokenizer=tokenizer,
132
+ **tokenizer_kwargs,
133
  )
134
  return pipeline
135
 
 
137
  @st.cache_resource
138
  def classify_sub_theme() -> TextClassificationPipeline:
139
  tokenizer = BertTokenizer.from_pretrained(
140
+ "ashhadahsan/amazon-subtheme-bert-base-finetuned",
141
  )
142
  model = TFBertForSequenceClassification.from_pretrained(
143
+ "ashhadahsan/amazon-subtheme-bert-base-finetuned",
144
  )
145
  pipeline = TextClassificationPipeline(
146
+ model=model, tokenizer=tokenizer, **tokenizer_kwargs
147
  )
148
  return pipeline
149
 
150
 
151
  st.set_page_config(layout="wide", page_title="Amazon Review | Summarizer")
152
+ st.title(body="Amazon Review Summarizer")
153
 
154
+ uploaded_file = st.file_uploader(label="Choose a file", type=["xlsx", "xls", "csv"])
155
 
 
 
 
 
 
 
 
 
 
156
 
157
  summarizer_option = st.selectbox(
158
+ label="Select Summarizer",
159
+ options=("Custom trained on the dataset", "t5-base", "t5-one-line-summary"),
160
  )
161
+ col1, col2, col3 = st.columns(spec=[1, 1, 1])
162
 
163
  with col1:
164
+ summary_yes = st.checkbox(label="Summrization", value=False)
165
 
166
  with col2:
167
+ classification = st.checkbox(label="Classify Category", value=True)
168
 
169
  with col3:
170
+ sub_theme = st.checkbox(label="Sub theme classification", value=True)
171
 
172
  treshold = st.slider(
173
  label="Model Confidence value",
 
181
  ps = st.empty()
182
 
183
  if st.button("Process", type="primary"):
184
+ themes = get_all_cats()
185
+ subthemes = get_all_subcats()
 
186
 
187
  oneline = SimpleT5()
188
  load_one_line_summarizer(model=oneline)
189
+ zeroline = load_zero_shot_classification_large()
190
+ bot = load_flan_t5_large()
191
 
192
  cancel_button = st.empty()
193
  cancel_button2 = st.empty()
194
  cancel_button3 = st.empty()
195
  if uploaded_file is not None:
196
  if uploaded_file.name.split(".")[-1] in ["xls", "xlsx"]:
197
+ df = pd.read_excel(io=uploaded_file, engine="openpyxl")
198
  if uploaded_file.name.split(".")[-1] in [".csv"]:
199
+ df = pd.read_csv(filepath_or_buffer=uploaded_file)
200
  columns = df.columns.values.tolist()
201
  columns = [x.lower() for x in columns]
202
  df.columns = columns
 
212
  progress_text = "Summarization in progress. Please wait."
213
  summary = []
214
 
215
+ for x in stqdm(iterable=range(len(text))):
216
  if cancel_button.button("Cancel", key=x):
217
  del model
218
  break
 
234
  classesUnlabel = []
235
  classesUnlabelZero = []
236
  for x in stqdm(
237
+ iterable=text,
238
  desc="Assigning Themes ...",
239
  total=len(text),
240
  colour="#BF1A1A",
241
  ):
242
+ output = themePipe(x)[0]["label"]
243
  classes.append(output)
244
+ score = round(number=themePipe(x)[0]["score"], ndigits=2)
245
  if score <= treshold:
246
+ onelineoutput = oneline.predict(source_text=x)[0]
247
+
248
  print("hit")
249
  classesUnlabel.append(
250
+ assign_labels_flant5(
251
+ bot,
252
  what="theme",
253
  to=onelineoutput,
254
  old=themes,
255
  )
256
  )
257
  classesUnlabelZero.append(
258
+ assign_label_zeroshot(
259
  zero=zeroline, to=onelineoutput, old=themes
260
  )
261
  )
 
267
  outputdf["Review Theme"] = classes
268
  outputdf["Review Theme-issue-new"] = classesUnlabel
269
  outputdf["Review SubTheme-issue-zero"] = classesUnlabelZero
270
+ clean_memory(themePipe)
271
  if sub_theme:
272
  subThemePipe = classify_sub_theme()
273
  classes = []
274
  classesUnlabel = []
275
  classesUnlabelZero = []
276
  for x in stqdm(
277
+ iterable=text,
278
  desc="Assigning Subthemes ...",
279
  total=len(text),
280
  colour="green",
281
  ):
282
+ output = subThemePipe(x)[0]["label"]
283
  classes.append(output)
284
+ score = round(subThemePipe(x)[0]["score"], 2)
285
  if score <= treshold:
286
  onelineoutput = oneline.predict(x)[0]
287
 
 
 
288
  print("hit")
289
  classesUnlabel.append(
290
+ assign_labels_flant5(
291
+ bot,
292
  what="subtheme",
293
  to=onelineoutput,
294
  old=subthemes,
295
  )
296
  )
297
  classesUnlabelZero.append(
298
+ assign_label_zeroshot(
299
  zero=zeroline,
300
  to=onelineoutput,
301
  old=subthemes,
 
310
  outputdf["Review SubTheme-issue-new"] = classesUnlabel
311
  outputdf["Review SubTheme-issue-zero"] = classesUnlabelZero
312
 
313
+ clean_memory(subThemePipe)
314
 
315
  csv = convert_df(outputdf)
316
  st.download_button(
 
356
  for x in stqdm(
357
  text, desc="Assigning Themes ...", total=len(text), colour="red"
358
  ):
359
+ output = themePipe(x)[0]["label"]
360
  classes.append(output)
361
+ score = round(themePipe(x)[0]["score"], 2)
362
  if score <= treshold:
363
  onelineoutput = oneline.predict(x)[0]
364
 
365
  print("hit")
 
366
 
367
  classesUnlabel.append(
368
+ assign_labels_flant5(
369
+ bot,
370
  what="theme",
371
  to=onelineoutput,
372
  old=themes,
373
  )
374
  )
375
  classesUnlabelZero.append(
376
+ assign_label_zeroshot(
377
  zero=zeroline, to=onelineoutput, old=themes
378
  )
379
  )
 
384
  outputdf["Review Theme"] = classes
385
  outputdf["Review Theme-issue-new"] = classesUnlabel
386
  outputdf["Review SubTheme-issue-zero"] = classesUnlabelZero
387
+ clean_memory(themePipe)
388
 
389
  if sub_theme:
390
  subThemePipe = classify_sub_theme()
 
397
  total=len(text),
398
  colour="green",
399
  ):
400
+ output = subThemePipe(x)[0]["label"]
401
  classes.append(output)
402
+ score = round(subThemePipe(x)[0]["score"], 2)
403
  if score <= treshold:
404
  onelineoutput = oneline.predict(x)[0]
405
 
 
406
  print("hit")
407
  classesUnlabel.append(
408
+ assign_labels_flant5(
409
+ bot,
410
  what="subtheme",
411
  to=onelineoutput,
412
  old=subthemes,
413
  )
414
  )
415
  classesUnlabelZero.append(
416
+ assign_label_zeroshot(
417
  zero=zeroline,
418
  to=onelineoutput,
419
  old=subthemes,
 
428
  outputdf["Review SubTheme-issue-new"] = classesUnlabel
429
  outputdf["Review SubTheme-issue-zero"] = classesUnlabelZero
430
 
431
+ clean_memory(subThemePipe)
432
 
433
  csv = convert_df(outputdf)
434
  st.download_button(
 
445
  load_one_line_summarizer(model=model)
446
 
447
  summary = []
448
+ for x in stqdm(iterable=range(len(text))):
449
+ if cancel_button3.button(label="Cancel", key=x):
450
  del model
451
  break
452
  try:
453
+ summary.append(model.predict(source_text=text[x])[0])
454
  except:
455
  pass
456
  outputdf["summary"] = summary
 
462
  classesUnlabel = []
463
  classesUnlabelZero = []
464
  for x in stqdm(
465
+ iterable=text,
466
+ desc="Assigning Themes ...",
467
+ total=len(text),
468
+ colour="red",
469
  ):
470
+ output = themePipe(x)[0]["label"]
471
  classes.append(output)
472
+ score = round(number=themePipe(x)[0]["score"], ndigits=2)
473
  if score <= treshold:
474
  onelineoutput = oneline.predict(x)[0]
475
 
 
 
476
  print("hit")
477
  classesUnlabel.append(
478
+ assign_labels_flant5(
479
+ bot,
480
  what="theme",
481
  to=onelineoutput,
482
  old=themes,
483
  )
484
  )
485
  classesUnlabelZero.append(
486
+ assign_label_zeroshot(
487
  zero=zeroline, to=onelineoutput, old=themes
488
  )
489
  )
 
501
  classesUnlabelZero = []
502
 
503
  for x in stqdm(
504
+ iterable=text,
505
  desc="Assigning Subthemes ...",
506
  total=len(text),
507
  colour="green",
508
  ):
509
+ output = subThemePipe(x)[0]["label"]
510
  classes.append(output)
511
+ score = round(subThemePipe(x)[0]["score"], 2)
512
  if score <= treshold:
513
  print("hit")
514
+ onelineoutput = oneline.predict(source_text=x)[0]
515
 
 
516
  classesUnlabel.append(
517
+ assign_labels_flant5(
518
+ bot,
519
  what="subtheme",
520
  to=onelineoutput,
521
  old=subthemes,
522
  )
523
  )
524
  classesUnlabelZero.append(
525
+ assign_label_zeroshot(
526
  zero=zeroline,
527
  to=onelineoutput,
528
  old=subthemes,
 
537
  outputdf["Review SubTheme-issue-new"] = classesUnlabel
538
  outputdf["Review SubTheme-issue-zero"] = classesUnlabelZero
539
 
540
+ clean_memory(subThemePipe)
541
 
542
  csv = convert_df(outputdf)
543
  st.download_button(
 
550
 
551
  except KeyError as e:
552
  st.error(
553
+ body="Please Make sure that your data must have a column named text",
554
  icon="🚨",
555
  )
556
+ st.info(body="Text column must have amazon reviews", icon="ℹ️")
557
+ st.exception(e)
558
 
559
  except BaseException as e:
560
+ logging.exception(msg="An exception was occurred")