ierhon commited on
Commit
84f4971
·
1 Parent(s): 99918f8

Update train.py

Browse files

Fix the tokenizer learning

Files changed (1) hide show
  1. train.py +1 -1
train.py CHANGED
@@ -10,7 +10,7 @@ with open("dataset.json", "r") as f:
10
  dset = json.load(f)
11
 
12
  tokenizer = Tokenizer()
13
- tokenizer.fit_on_texts(dset)
14
 
15
  emb_size = 128 # how big are the word vectors in the input (how much information can be fit into one word)
16
  vocab_size = len(tokenizer.get_vocabulary())
 
10
  dset = json.load(f)
11
 
12
  tokenizer = Tokenizer()
13
+ tokenizer.fit_on_texts(list(dset.keys()))
14
 
15
  emb_size = 128 # how big are the word vectors in the input (how much information can be fit into one word)
16
  vocab_size = len(tokenizer.get_vocabulary())