File size: 2,811 Bytes
4824cb6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
import nltk
import random
import numpy as np
import json
import pickle
from nltk.stem import WordNetLemmatizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import SGD
lemmatizer = WordNetLemmatizer()
# Load the intents file
with open('intents.json') as json_file:
intents = json.load(json_file)
# Initialize lists
words = []
classes = []
documents = []
ignore_words = ['?', '!']
# Process the intents
for intent in intents['intents']:
for pattern in intent['patterns']:
word_list = nltk.word_tokenize(pattern)
words.extend(word_list)
documents.append((word_list, intent['tag']))
if intent['tag'] not in classes:
classes.append(intent['tag'])
words = [lemmatizer.lemmatize(w.lower()) for w in words if w not in ignore_words]
words = sorted(list(set(words)))
classes = sorted(list(set(classes)))
pickle.dump(words, open('words.pkl', 'wb'))
pickle.dump(classes, open('classes.pkl', 'wb'))
training = []
output_empty = [0] * len(classes)
# Debugging: Print lengths of words and classes
print(f'Number of words: {len(words)}')
print(f'Number of classes: {len(classes)}')
for doc in documents:
bag = []
pattern_words = doc[0]
pattern_words = [lemmatizer.lemmatize(word.lower()) for word in pattern_words]
for word in words:
bag.append(1) if word in pattern_words else bag.append(0)
output_row = list(output_empty)
output_row[classes.index(doc[1])] = 1
training.append([bag, output_row])
# Debugging: Check for inconsistencies in training data
for i, t in enumerate(training):
if len(t[0]) != len(words):
print(f'Inconsistent length in training data at index {i}: {len(t[0])} != {len(words)}')
random.shuffle(training)
training = np.array(training, dtype=object)
# Debugging: Print shape of training data
print(f'Training data shape: {training.shape}')
train_x = list(training[:, 0])
train_y = list(training[:, 1])
# Debugging: Print shapes of train_x and train_y
print(f'Shape of train_x: {np.array(train_x).shape}')
print(f'Shape of train_y: {np.array(train_y).shape}')
model = Sequential()
model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation='softmax'))
sgd = SGD(learning_rate=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
hist = model.fit(np.array(train_x), np.array(train_y), epochs=200, batch_size=5, verbose=1)
model.save('chatbotmodel.h5', hist)
print("Model trained and saved.")
|