Update pipeline.py
Browse files- pipeline.py +6 -1
pipeline.py
CHANGED
@@ -297,12 +297,17 @@ class Branch_Classifier(object):
|
|
297 |
def format_data(self,data:list)-> list:
|
298 |
try:
|
299 |
X = text_cleaning.text_cleaning(data, steam=False, lemma=True,single_input=True)[0]
|
300 |
-
|
301 |
### Add Bigrams and keep only the good ones(pre-selected)
|
302 |
X_bigrmas = self.phrase_bigrams[X]
|
|
|
|
|
303 |
data_clean = self.clean_bigram(X_bigrmas)
|
|
|
304 |
X_bigrams_clean = ' '.join(map(str, data_clean))
|
|
|
305 |
pre_processed = self.vectorizer.transform([X_bigrams_clean]).toarray(),X_bigrams_clean
|
|
|
306 |
|
307 |
except Exception as e:
|
308 |
logging.exception("Error occurred while formatting and cleaning data" +" Info: " + str(e))
|
|
|
297 |
def format_data(self,data:list)-> list:
|
298 |
try:
|
299 |
X = text_cleaning.text_cleaning(data, steam=False, lemma=True,single_input=True)[0]
|
300 |
+
print("\n1: ",X)
|
301 |
### Add Bigrams and keep only the good ones(pre-selected)
|
302 |
X_bigrmas = self.phrase_bigrams[X]
|
303 |
+
print("\n2: ",X_bigrmas)
|
304 |
+
|
305 |
data_clean = self.clean_bigram(X_bigrmas)
|
306 |
+
print("\n3: ",data_clean)
|
307 |
X_bigrams_clean = ' '.join(map(str, data_clean))
|
308 |
+
print("\n4: ",X_bigrams_clean)
|
309 |
pre_processed = self.vectorizer.transform([X_bigrams_clean]).toarray(),X_bigrams_clean
|
310 |
+
print("\n5: ",pre_processed)
|
311 |
|
312 |
except Exception as e:
|
313 |
logging.exception("Error occurred while formatting and cleaning data" +" Info: " + str(e))
|