DanielSc4 commited on
Commit
cb9a228
·
1 Parent(s): 952088c

hopefully nlp fix

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -19,7 +19,7 @@ def sent_to_words(sentences):
19
  for sentence in sentences:
20
  yield(gensim.utils.simple_preprocess(str(sentence), deacc=True)) # deacc=True removes punctuations
21
 
22
- def lemmatization(texts, allowed_postags=['NOUN', 'ADJ', 'VERB', 'ADV']): #'NOUN', 'ADJ', 'VERB', 'ADV'
23
  texts_out = []
24
  for sent in texts:
25
  doc = nlp(" ".join(sent))
@@ -46,9 +46,9 @@ def main(choose_context):
46
  print('downloading en_core_web_sm')
47
  os.system("python -m spacy download en_core_web_sm")
48
  print('en_core_web_sm downloaded')
49
-
50
  nlp = spacy.load("en_core_web_sm", disable=["parser", "ner"])
51
- data_lemmatized = lemmatization(data_words, allowed_postags=["NOUN", "ADJ"]) #select noun and verb
52
 
53
  vectorizer = CountVectorizer(
54
  analyzer='word',
 
19
  for sentence in sentences:
20
  yield(gensim.utils.simple_preprocess(str(sentence), deacc=True)) # deacc=True removes punctuations
21
 
22
+ def lemmatization(texts, allowed_postags=['NOUN', 'ADJ', 'VERB', 'ADV'], nlp=None): #'NOUN', 'ADJ', 'VERB', 'ADV'
23
  texts_out = []
24
  for sent in texts:
25
  doc = nlp(" ".join(sent))
 
46
  print('downloading en_core_web_sm')
47
  os.system("python -m spacy download en_core_web_sm")
48
  print('en_core_web_sm downloaded')
49
+
50
  nlp = spacy.load("en_core_web_sm", disable=["parser", "ner"])
51
+ data_lemmatized = lemmatization(data_words, allowed_postags=["NOUN", "ADJ"], nlp=nlp) #select noun and verb
52
 
53
  vectorizer = CountVectorizer(
54
  analyzer='word',