ierhon commited on
Commit
41c4775
·
1 Parent(s): 4ab880f

Fix vocab_size to +1

Browse files
Files changed (1) hide show
  1. train.py +1 -1
train.py CHANGED
@@ -15,7 +15,7 @@ dset_size = len(dset)
15
  tokenizer = Tokenizer() # a tokenizer is a thing to split text into words, it might have some other stuff like making all the letters lowercase, etc.
16
  tokenizer.fit_on_texts(list(dset.keys()))
17
 
18
- vocab_size = len(tokenizer.word_index)
19
 
20
  model = Sequential()
21
  model.add(Embedding(input_dim=vocab_size, output_dim=emb_size, input_length=inp_len))
 
15
  tokenizer = Tokenizer() # a tokenizer is a thing to split text into words, it might have some other stuff like making all the letters lowercase, etc.
16
  tokenizer.fit_on_texts(list(dset.keys()))
17
 
18
+ vocab_size = len(tokenizer.word_index) + 1
19
 
20
  model = Sequential()
21
  model.add(Embedding(input_dim=vocab_size, output_dim=emb_size, input_length=inp_len))