bohraanuj23 commited on
Commit
6aad03c
·
verified ·
1 Parent(s): 6d59867

Delete chatbot.ipynb

Browse files
Files changed (1) hide show
  1. chatbot.ipynb +0 -357
chatbot.ipynb DELETED
@@ -1,357 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": null,
6
- "metadata": {},
7
- "outputs": [],
8
- "source": [
9
- "import numpy as np\n",
10
- "import pandas as pd\n"
11
- ]
12
- },
13
- {
14
- "cell_type": "code",
15
- "execution_count": null,
16
- "metadata": {},
17
- "outputs": [],
18
- "source": [
19
- "import random\n",
20
- "import json\n",
21
- "\n",
22
- "import torch\n",
23
- "import torch.nn as nn\n",
24
- "import nltk\n",
25
- "nltk.download('punkt')\n",
26
- "\n",
27
- "from torch.utils.data import Dataset, DataLoader\n",
28
- "\n",
29
- "import numpy as np\n",
30
- "from nltk.stem.porter import PorterStemmer\n",
31
- "stemmer = PorterStemmer()"
32
- ]
33
- },
34
- {
35
- "cell_type": "code",
36
- "execution_count": null,
37
- "metadata": {},
38
- "outputs": [],
39
- "source": [
40
- "class NeuralNet(nn.Module):\n",
41
- " def __init__(self, input_size, hidden_size, num_classes):\n",
42
- " super(NeuralNet, self).__init__()\n",
43
- " self.l1 = nn.Linear(input_size, hidden_size) \n",
44
- " self.l2 = nn.Linear(hidden_size, hidden_size) \n",
45
- " self.l3 = nn.Linear(hidden_size, num_classes)\n",
46
- " self.relu = nn.ReLU()\n",
47
- " \n",
48
- " def forward(self, x):\n",
49
- " out = self.l1(x)\n",
50
- " out = self.relu(out)\n",
51
- " out = self.l2(out)\n",
52
- " out = self.relu(out)\n",
53
- " out = self.l3(out)\n",
54
- " return out"
55
- ]
56
- },
57
- {
58
- "cell_type": "code",
59
- "execution_count": null,
60
- "metadata": {},
61
- "outputs": [],
62
- "source": [
63
- "import json\n",
64
- "\n",
65
- "\n",
66
- "intents_file_path = 'data\\intents.json'\n",
67
- "\n",
68
- "\n",
69
- "with open(intents_file_path, 'r') as f:\n",
70
- " intents = json.load(f)\n",
71
- "\n",
72
- "\n",
73
- "print(intents)\n"
74
- ]
75
- },
76
- {
77
- "cell_type": "code",
78
- "execution_count": null,
79
- "metadata": {},
80
- "outputs": [],
81
- "source": [
82
- "def tokenize(sentence):\n",
83
- " return nltk.word_tokenize(sentence)\n",
84
- "\n",
85
- "\n",
86
- "def stem(word):\n",
87
- " return stemmer.stem(word.lower())\n",
88
- "\n",
89
- "\n",
90
- "def bag_of_words(tokenized_sentence, words):\n",
91
- " sentence_words = [stem(word) for word in tokenized_sentence]\n",
92
- " bag = np.zeros(len(words), dtype=np.float32)\n",
93
- " for idx, w in enumerate(words):\n",
94
- " if w in sentence_words: \n",
95
- " bag[idx] = 1\n",
96
- "\n",
97
- " return bag"
98
- ]
99
- },
100
- {
101
- "cell_type": "code",
102
- "execution_count": null,
103
- "metadata": {},
104
- "outputs": [],
105
- "source": [
106
- "all_words = []\n",
107
- "tags = []\n",
108
- "xy = []\n",
109
- "for intent in intents['intents']:\n",
110
- " tag = intent['tag']\n",
111
- " tags.append(tag)\n",
112
- " for pattern in intent['patterns']:\n",
113
- " w = tokenize(pattern)\n",
114
- " all_words.extend(w)\n",
115
- " xy.append((w, tag))\n",
116
- "\n",
117
- "ignore_words = ['?', '.', '!']\n",
118
- "all_words = [stem(w) for w in all_words if w not in ignore_words]\n",
119
- "all_words = sorted(set(all_words))\n",
120
- "tags = sorted(set(tags))\n",
121
- "\n",
122
- "print(len(xy), \"patterns\")\n",
123
- "print(len(tags), \"tags:\", tags)\n",
124
- "print(len(all_words), \"unique stemmed words:\", all_words)"
125
- ]
126
- },
127
- {
128
- "cell_type": "code",
129
- "execution_count": null,
130
- "metadata": {},
131
- "outputs": [],
132
- "source": [
133
- "X_train = []\n",
134
- "y_train = []\n",
135
- "for (pattern_sentence, tag) in xy:\n",
136
- " bag = bag_of_words(pattern_sentence, all_words)\n",
137
- " X_train.append(bag)\n",
138
- " label = tags.index(tag)\n",
139
- " y_train.append(label)\n",
140
- "\n",
141
- "X_train = np.array(X_train)\n",
142
- "y_train = np.array(y_train)"
143
- ]
144
- },
145
- {
146
- "cell_type": "code",
147
- "execution_count": null,
148
- "metadata": {},
149
- "outputs": [],
150
- "source": [
151
- "num_epochs = 1000\n",
152
- "batch_size = 8\n",
153
- "learning_rate = 0.001\n",
154
- "input_size = len(X_train[0])\n",
155
- "hidden_size = 8\n",
156
- "output_size = len(tags)\n",
157
- "print(input_size, output_size)"
158
- ]
159
- },
160
- {
161
- "cell_type": "code",
162
- "execution_count": null,
163
- "metadata": {},
164
- "outputs": [],
165
- "source": [
166
- "class ChatDataset(Dataset):\n",
167
- "\n",
168
- " def __init__(self):\n",
169
- " self.n_samples = len(X_train)\n",
170
- " self.x_data = X_train\n",
171
- " self.y_data = y_train\n",
172
- "\n",
173
- " def __getitem__(self, index):\n",
174
- " return self.x_data[index], self.y_data[index]\n",
175
- " def __len__(self):\n",
176
- " return self.n_samples"
177
- ]
178
- },
179
- {
180
- "cell_type": "code",
181
- "execution_count": null,
182
- "metadata": {},
183
- "outputs": [],
184
- "source": [
185
- "dataset = ChatDataset()\n",
186
- "train_loader = DataLoader(dataset=dataset,\n",
187
- " batch_size=batch_size,\n",
188
- " shuffle=True,\n",
189
- " num_workers=0)\n",
190
- "\n",
191
- "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
192
- "\n",
193
- "model = NeuralNet(input_size, hidden_size, output_size).to(device)"
194
- ]
195
- },
196
- {
197
- "cell_type": "code",
198
- "execution_count": null,
199
- "metadata": {},
200
- "outputs": [],
201
- "source": [
202
- "model"
203
- ]
204
- },
205
- {
206
- "cell_type": "code",
207
- "execution_count": null,
208
- "metadata": {},
209
- "outputs": [],
210
- "source": [
211
- "dataset"
212
- ]
213
- },
214
- {
215
- "cell_type": "code",
216
- "execution_count": null,
217
- "metadata": {},
218
- "outputs": [],
219
- "source": [
220
- "criterion = nn.CrossEntropyLoss()\n",
221
- "optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)\n",
222
- "\n",
223
- "for epoch in range(num_epochs):\n",
224
- " for (words, labels) in train_loader:\n",
225
- " words = words.to(device)\n",
226
- " labels = labels.to(dtype=torch.long).to(device)\n",
227
- "\n",
228
- " outputs = model(words)\n",
229
- " loss = criterion(outputs, labels)\n",
230
- " \n",
231
- " optimizer.zero_grad()\n",
232
- " loss.backward()\n",
233
- " optimizer.step()\n",
234
- " \n",
235
- " if (epoch+1) % 100 == 0:\n",
236
- " print (f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')\n",
237
- "\n",
238
- "\n",
239
- "print(f'final loss: {loss.item():.4f}')"
240
- ]
241
- },
242
- {
243
- "cell_type": "code",
244
- "execution_count": null,
245
- "metadata": {},
246
- "outputs": [],
247
- "source": [
248
- "data = {\n",
249
- "\"model_state\": model.state_dict(),\n",
250
- "\"input_size\": input_size,\n",
251
- "\"hidden_size\": hidden_size,\n",
252
- "\"output_size\": output_size,\n",
253
- "\"all_words\": all_words,\n",
254
- "\"tags\": tags\n",
255
- "}\n",
256
- "\n",
257
- "FILE = \"data.pth\"\n",
258
- "torch.save(data, FILE)\n",
259
- "\n",
260
- "print(f'training complete. file saved to {FILE}')"
261
- ]
262
- },
263
- {
264
- "cell_type": "code",
265
- "execution_count": 16,
266
- "metadata": {},
267
- "outputs": [
268
- {
269
- "ename": "KeyboardInterrupt",
270
- "evalue": "Interrupted by user",
271
- "output_type": "error",
272
- "traceback": [
273
- "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
274
- "\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
275
- "\u001b[1;32m~\\AppData\\Local\\Temp\\ipykernel_7908\\3081624382.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 20\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Let's chat! (type 'quit' to exit)\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 21\u001b[0m \u001b[1;32mwhile\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 22\u001b[1;33m \u001b[0msentence\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0minput\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"You: \"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 23\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0msentence\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;34m\"quit\"\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 24\u001b[0m \u001b[1;32mbreak\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
276
- "\u001b[1;32mc:\\Users\\Anuj Bohra\\anaconda3\\envs\\anuj\\lib\\site-packages\\ipykernel\\kernelbase.py\u001b[0m in \u001b[0;36mraw_input\u001b[1;34m(self, prompt)\u001b[0m\n\u001b[0;32m 1179\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_parent_ident\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m\"shell\"\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1180\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_parent\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"shell\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1181\u001b[1;33m \u001b[0mpassword\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1182\u001b[0m )\n\u001b[0;32m 1183\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
277
- "\u001b[1;32mc:\\Users\\Anuj Bohra\\anaconda3\\envs\\anuj\\lib\\site-packages\\ipykernel\\kernelbase.py\u001b[0m in \u001b[0;36m_input_request\u001b[1;34m(self, prompt, ident, parent, password)\u001b[0m\n\u001b[0;32m 1217\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mKeyboardInterrupt\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1218\u001b[0m \u001b[1;31m# re-raise KeyboardInterrupt, to truncate traceback\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1219\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mKeyboardInterrupt\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Interrupted by user\"\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1220\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mException\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1221\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlog\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwarning\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Invalid Message:\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mexc_info\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
278
- "\u001b[1;31mKeyboardInterrupt\u001b[0m: Interrupted by user"
279
- ]
280
- }
281
- ],
282
- "source": [
283
- "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
284
- "\n",
285
- "\n",
286
- "\n",
287
- "FILE = \"data.pth\"\n",
288
- "data = torch.load(FILE)\n",
289
- "\n",
290
- "input_size = data[\"input_size\"]\n",
291
- "hidden_size = data[\"hidden_size\"]\n",
292
- "output_size = data[\"output_size\"]\n",
293
- "all_words = data['all_words']\n",
294
- "tags = data['tags']\n",
295
- "model_state = data[\"model_state\"]\n",
296
- "\n",
297
- "model = NeuralNet(input_size, hidden_size, output_size).to(device)\n",
298
- "model.load_state_dict(model_state)\n",
299
- "model.eval()\n",
300
- "\n",
301
- "bot_name = \"Medical ChatBot\"\n",
302
- "print(\"Let's chat! (type 'quit' to exit)\")\n",
303
- "while True:\n",
304
- " sentence = input(\"You: \")\n",
305
- " if sentence == \"quit\":\n",
306
- " break\n",
307
- "\n",
308
- " sentence = tokenize(sentence)\n",
309
- " X = bag_of_words(sentence, all_words)\n",
310
- " X = X.reshape(1, X.shape[0])\n",
311
- " X = torch.from_numpy(X).to(device)\n",
312
- "\n",
313
- " output = model(X)\n",
314
- " _, predicted = torch.max(output, dim=1)\n",
315
- "\n",
316
- " tag = tags[predicted.item()]\n",
317
- "\n",
318
- " probs = torch.softmax(output, dim=1)\n",
319
- " prob = probs[0][predicted.item()]\n",
320
- " if prob.item() > 0.75:\n",
321
- " for intent in intents['intents']:\n",
322
- " if tag == intent[\"tag\"]:\n",
323
- " print(f\"{bot_name}: {random.choice(intent['responses'])}\")\n",
324
- " else:\n",
325
- " print(f\"{bot_name}: I do not understand...\")"
326
- ]
327
- },
328
- {
329
- "cell_type": "code",
330
- "execution_count": null,
331
- "metadata": {},
332
- "outputs": [],
333
- "source": []
334
- }
335
- ],
336
- "metadata": {
337
- "kernelspec": {
338
- "display_name": "anuj",
339
- "language": "python",
340
- "name": "python3"
341
- },
342
- "language_info": {
343
- "codemirror_mode": {
344
- "name": "ipython",
345
- "version": 3
346
- },
347
- "file_extension": ".py",
348
- "mimetype": "text/x-python",
349
- "name": "python",
350
- "nbconvert_exporter": "python",
351
- "pygments_lexer": "ipython3",
352
- "version": "3.7.16"
353
- }
354
- },
355
- "nbformat": 4,
356
- "nbformat_minor": 2
357
- }