Spaces:

nam194
/

Review_company_analysis_and_Resume_parsing

Running

nam194 commited on Jun 18, 2023

Commit

0df41e3

1 Parent(s): 4d38a62

Update utils.py

Files changed (1) hide show

utils.py CHANGED Viewed

@@ -57,7 +57,7 @@ def normalize(text, segment=True):
     text = replace_all(text, dict_map)
     if segment:
         text = text.split(".")
-        text = ". ".join([underthesea.word_tokenize(i, format="text") for i in text)])
     return text
 def text_preprocess(document):
     punc = [i for i in ["\"", "-", ".", ":"]]#string.punctuation.replace(",","")]
@@ -84,7 +84,7 @@ def text_preprocess(document):
     document = re.sub("  ", " ", document)
     try:
         document = document.split(".")
-        document = ". ".join([underthesea.word_tokenize(i, format="text") for i in document)])
     except:
         pass
     return document.lower()

     text = replace_all(text, dict_map)
     if segment:
         text = text.split(".")
+        text = ". ".join([underthesea.word_tokenize(i, format="text") for i in text])
     return text
 def text_preprocess(document):
     punc = [i for i in ["\"", "-", ".", ":"]]#string.punctuation.replace(",","")]
     document = re.sub("  ", " ", document)
     try:
         document = document.split(".")
+        document = ". ".join([underthesea.word_tokenize(i, format="text") for i in document])
     except:
         pass
     return document.lower()