ando55 commited on
Commit
9b52d14
·
1 Parent(s): 1c9e7ad

Delete nltk_utils.py

Browse files
Files changed (1) hide show
  1. nltk_utils.py +0 -27
nltk_utils.py DELETED
@@ -1,27 +0,0 @@
1
- import nltk
2
- import numpy as np
3
- #nltk.download('all')
4
- from nltk.stem.porter import PorterStemmer
5
- stemmer = PorterStemmer()
6
- def tokenize(sentence):
7
- """
8
- split sentence into array of words/tokens
9
- a token can be a word or punctuation character, or number
10
- """
11
- return nltk.word_tokenize(sentence)
12
-
13
- def stem(word):
14
-
15
- return stemmer.stem(word.lower())
16
-
17
- def bag_of_words(tokenized_sentence, all_words):
18
-
19
- tokenized_sentence = [stem(w) for w in tokenized_sentence]
20
-
21
- bag = np.zeros(len(all_words), dtype=np.float32)
22
- for idx, w in enumerate(all_words):
23
- if w in tokenized_sentence:
24
- bag[idx] = 1.0
25
- return bag
26
-
27
-