Spaces:
Runtime error
Runtime error
Delete nltk_utils.py
Browse files- nltk_utils.py +0 -27
nltk_utils.py
DELETED
@@ -1,27 +0,0 @@
|
|
1 |
-
import nltk
|
2 |
-
import numpy as np
|
3 |
-
#nltk.download('all')
|
4 |
-
from nltk.stem.porter import PorterStemmer
|
5 |
-
stemmer = PorterStemmer()
|
6 |
-
def tokenize(sentence):
|
7 |
-
"""
|
8 |
-
split sentence into array of words/tokens
|
9 |
-
a token can be a word or punctuation character, or number
|
10 |
-
"""
|
11 |
-
return nltk.word_tokenize(sentence)
|
12 |
-
|
13 |
-
def stem(word):
|
14 |
-
|
15 |
-
return stemmer.stem(word.lower())
|
16 |
-
|
17 |
-
def bag_of_words(tokenized_sentence, all_words):
|
18 |
-
|
19 |
-
tokenized_sentence = [stem(w) for w in tokenized_sentence]
|
20 |
-
|
21 |
-
bag = np.zeros(len(all_words), dtype=np.float32)
|
22 |
-
for idx, w in enumerate(all_words):
|
23 |
-
if w in tokenized_sentence:
|
24 |
-
bag[idx] = 1.0
|
25 |
-
return bag
|
26 |
-
|
27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|