Update train.py
Browse filesFix the tokenizer learning
train.py
CHANGED
@@ -10,7 +10,7 @@ with open("dataset.json", "r") as f:
|
|
10 |
dset = json.load(f)
|
11 |
|
12 |
tokenizer = Tokenizer()
|
13 |
-
tokenizer.fit_on_texts(dset)
|
14 |
|
15 |
emb_size = 128 # how big are the word vectors in the input (how much information can be fit into one word)
|
16 |
vocab_size = len(tokenizer.get_vocabulary())
|
|
|
10 |
dset = json.load(f)
|
11 |
|
12 |
tokenizer = Tokenizer()
|
13 |
+
tokenizer.fit_on_texts(list(dset.keys()))
|
14 |
|
15 |
emb_size = 128 # how big are the word vectors in the input (how much information can be fit into one word)
|
16 |
vocab_size = len(tokenizer.get_vocabulary())
|