Use new dataset.json
Browse files
train.py
CHANGED
@@ -8,10 +8,12 @@ from keras_self_attention import SeqSelfAttention, SeqWeightedAttention
|
|
8 |
from model_settings import *
|
9 |
|
10 |
|
11 |
-
with open("dataset.json", "r") as f:
|
12 |
dset = json.load(f)
|
13 |
|
14 |
-
|
|
|
|
|
15 |
tokenizer = Tokenizer() # a tokenizer is a thing to split text into words, it might have some other stuff like making all the letters lowercase, etc.
|
16 |
tokenizer.fit_on_texts(list(dset.keys()))
|
17 |
|
@@ -31,11 +33,11 @@ model.add(Dense(dset_size, activation="linear")) # TBH it doesn't matter that mu
|
|
31 |
X = [] # we're loading the training data into input X
|
32 |
y = [] # and output y
|
33 |
|
34 |
-
for
|
35 |
tokens = tokenizer.texts_to_sequences([key,])[0]
|
36 |
X.append(np.array((list(tokens)+[0,]*inp_len)[:inp_len])) # refusing to use pad_sequences for an unspecified reason and creating the worst line of code
|
37 |
output_array = np.zeros(dset_size)
|
38 |
-
output_array[
|
39 |
y.append(output_array)
|
40 |
|
41 |
X = np.array(X) # normal lists are way slower than numpy arrays (remember, a list and an array is not the same thing, an array is far more limited)
|
|
|
8 |
from model_settings import *
|
9 |
|
10 |
|
11 |
+
with open("dataset.json", "r") as f:
|
12 |
dset = json.load(f)
|
13 |
|
14 |
+
with open("responses.txt", "r") as f:
|
15 |
+
dset_size = len(f.readlines())
|
16 |
+
|
17 |
tokenizer = Tokenizer() # a tokenizer is a thing to split text into words, it might have some other stuff like making all the letters lowercase, etc.
|
18 |
tokenizer.fit_on_texts(list(dset.keys()))
|
19 |
|
|
|
33 |
X = [] # we're loading the training data into input X
|
34 |
y = [] # and output y
|
35 |
|
36 |
+
for key in dset:
|
37 |
tokens = tokenizer.texts_to_sequences([key,])[0]
|
38 |
X.append(np.array((list(tokens)+[0,]*inp_len)[:inp_len])) # refusing to use pad_sequences for an unspecified reason and creating the worst line of code
|
39 |
output_array = np.zeros(dset_size)
|
40 |
+
output_array[dset[key]] = 1 # 0 0 0 1 0 0 0 0 0, the neuron of the each line activates in the correct response
|
41 |
y.append(output_array)
|
42 |
|
43 |
X = np.array(X) # normal lists are way slower than numpy arrays (remember, a list and an array is not the same thing, an array is far more limited)
|