Spaces:
Runtime error
Runtime error
hopefully nlp fix
Browse files
app.py
CHANGED
@@ -19,7 +19,7 @@ def sent_to_words(sentences):
|
|
19 |
for sentence in sentences:
|
20 |
yield(gensim.utils.simple_preprocess(str(sentence), deacc=True)) # deacc=True removes punctuations
|
21 |
|
22 |
-
def lemmatization(texts, allowed_postags=['NOUN', 'ADJ', 'VERB', 'ADV']): #'NOUN', 'ADJ', 'VERB', 'ADV'
|
23 |
texts_out = []
|
24 |
for sent in texts:
|
25 |
doc = nlp(" ".join(sent))
|
@@ -46,9 +46,9 @@ def main(choose_context):
|
|
46 |
print('downloading en_core_web_sm')
|
47 |
os.system("python -m spacy download en_core_web_sm")
|
48 |
print('en_core_web_sm downloaded')
|
49 |
-
|
50 |
nlp = spacy.load("en_core_web_sm", disable=["parser", "ner"])
|
51 |
-
data_lemmatized = lemmatization(data_words, allowed_postags=["NOUN", "ADJ"]) #select noun and verb
|
52 |
|
53 |
vectorizer = CountVectorizer(
|
54 |
analyzer='word',
|
|
|
19 |
for sentence in sentences:
|
20 |
yield(gensim.utils.simple_preprocess(str(sentence), deacc=True)) # deacc=True removes punctuations
|
21 |
|
22 |
+
def lemmatization(texts, allowed_postags=['NOUN', 'ADJ', 'VERB', 'ADV'], nlp=None): #'NOUN', 'ADJ', 'VERB', 'ADV'
|
23 |
texts_out = []
|
24 |
for sent in texts:
|
25 |
doc = nlp(" ".join(sent))
|
|
|
46 |
print('downloading en_core_web_sm')
|
47 |
os.system("python -m spacy download en_core_web_sm")
|
48 |
print('en_core_web_sm downloaded')
|
49 |
+
|
50 |
nlp = spacy.load("en_core_web_sm", disable=["parser", "ner"])
|
51 |
+
data_lemmatized = lemmatization(data_words, allowed_postags=["NOUN", "ADJ"], nlp=nlp) #select noun and verb
|
52 |
|
53 |
vectorizer = CountVectorizer(
|
54 |
analyzer='word',
|