Fix vocab_size to +1
Browse files
train.py
CHANGED
@@ -15,7 +15,7 @@ dset_size = len(dset)
|
|
15 |
tokenizer = Tokenizer() # a tokenizer is a thing to split text into words, it might have some other stuff like making all the letters lowercase, etc.
|
16 |
tokenizer.fit_on_texts(list(dset.keys()))
|
17 |
|
18 |
-
vocab_size = len(tokenizer.word_index)
|
19 |
|
20 |
model = Sequential()
|
21 |
model.add(Embedding(input_dim=vocab_size, output_dim=emb_size, input_length=inp_len))
|
|
|
15 |
tokenizer = Tokenizer() # a tokenizer is a thing to split text into words, it might have some other stuff like making all the letters lowercase, etc.
|
16 |
tokenizer.fit_on_texts(list(dset.keys()))
|
17 |
|
18 |
+
vocab_size = len(tokenizer.word_index) + 1
|
19 |
|
20 |
model = Sequential()
|
21 |
model.add(Embedding(input_dim=vocab_size, output_dim=emb_size, input_length=inp_len))
|