Spaces:
Runtime error
Runtime error
File size: 666 Bytes
926183f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
import nltk
import numpy as np
#nltk.download('all')
from nltk.stem.porter import PorterStemmer
stemmer = PorterStemmer()
def tokenize(sentence):
"""
split sentence into array of words/tokens
a token can be a word or punctuation character, or number
"""
return nltk.word_tokenize(sentence)
def stem(word):
return stemmer.stem(word.lower())
def bag_of_words(tokenized_sentence, all_words):
tokenized_sentence = [stem(w) for w in tokenized_sentence]
bag = np.zeros(len(all_words), dtype=np.float32)
for idx, w in enumerate(all_words):
if w in tokenized_sentence:
bag[idx] = 1.0
return bag
|