tsantos commited on
Commit
b44729f
·
1 Parent(s): 1b6edd2

Update pipeline.py

Browse files
Files changed (1) hide show
  1. pipeline.py +1 -6
pipeline.py CHANGED
@@ -23,6 +23,7 @@ import nltk
23
  nltk.download('punkt')
24
  nltk.download('stopwords')
25
  nltk.download('wordnet')
 
26
  import html
27
 
28
  from config import config_file
@@ -297,21 +298,15 @@ class Branch_Classifier(object):
297
  def format_data(self,data:list)-> list:
298
  try:
299
  X = text_cleaning.text_cleaning(data, steam=False, lemma=True,single_input=True)[0]
300
- print("\n1: ",X)
301
  ### Add Bigrams and keep only the good ones(pre-selected)
302
  X_bigrmas = self.phrase_bigrams[X]
303
- print("\n2: ",X_bigrmas)
304
 
305
  data_clean = self.clean_bigram(X_bigrmas)
306
- print("\n3: ",data_clean)
307
  X_bigrams_clean = ' '.join(map(str, data_clean))
308
- print("\n4: ",X_bigrams_clean)
309
  pre_processed = self.vectorizer.transform([X_bigrams_clean]).toarray(),X_bigrams_clean
310
- print("\n5: ",pre_processed)
311
 
312
  except Exception as e:
313
  logging.exception("Error occurred while formatting and cleaning data" +" Info: " + str(e))
314
- raise e
315
  exit()
316
 
317
  return pre_processed
 
23
  nltk.download('punkt')
24
  nltk.download('stopwords')
25
  nltk.download('wordnet')
26
+ nltk.download('omw-1.4')
27
  import html
28
 
29
  from config import config_file
 
298
  def format_data(self,data:list)-> list:
299
  try:
300
  X = text_cleaning.text_cleaning(data, steam=False, lemma=True,single_input=True)[0]
 
301
  ### Add Bigrams and keep only the good ones(pre-selected)
302
  X_bigrmas = self.phrase_bigrams[X]
 
303
 
304
  data_clean = self.clean_bigram(X_bigrmas)
 
305
  X_bigrams_clean = ' '.join(map(str, data_clean))
 
306
  pre_processed = self.vectorizer.transform([X_bigrams_clean]).toarray(),X_bigrams_clean
 
307
 
308
  except Exception as e:
309
  logging.exception("Error occurred while formatting and cleaning data" +" Info: " + str(e))
 
310
  exit()
311
 
312
  return pre_processed