tsantos commited on
Commit
14f2d8b
·
1 Parent(s): 0b524fb

Update pipeline.py

Browse files
Files changed (1) hide show
  1. pipeline.py +6 -1
pipeline.py CHANGED
@@ -297,12 +297,17 @@ class Branch_Classifier(object):
297
  def format_data(self,data:list)-> list:
298
  try:
299
  X = text_cleaning.text_cleaning(data, steam=False, lemma=True,single_input=True)[0]
300
-
301
  ### Add Bigrams and keep only the good ones(pre-selected)
302
  X_bigrmas = self.phrase_bigrams[X]
 
 
303
  data_clean = self.clean_bigram(X_bigrmas)
 
304
  X_bigrams_clean = ' '.join(map(str, data_clean))
 
305
  pre_processed = self.vectorizer.transform([X_bigrams_clean]).toarray(),X_bigrams_clean
 
306
 
307
  except Exception as e:
308
  logging.exception("Error occurred while formatting and cleaning data" +" Info: " + str(e))
 
297
  def format_data(self,data:list)-> list:
298
  try:
299
  X = text_cleaning.text_cleaning(data, steam=False, lemma=True,single_input=True)[0]
300
+ print("\n1: ",X)
301
  ### Add Bigrams and keep only the good ones(pre-selected)
302
  X_bigrmas = self.phrase_bigrams[X]
303
+ print("\n2: ",X_bigrmas)
304
+
305
  data_clean = self.clean_bigram(X_bigrmas)
306
+ print("\n3: ",data_clean)
307
  X_bigrams_clean = ' '.join(map(str, data_clean))
308
+ print("\n4: ",X_bigrams_clean)
309
  pre_processed = self.vectorizer.transform([X_bigrams_clean]).toarray(),X_bigrams_clean
310
+ print("\n5: ",pre_processed)
311
 
312
  except Exception as e:
313
  logging.exception("Error occurred while formatting and cleaning data" +" Info: " + str(e))