Spaces:
Sleeping
Sleeping
Commit
·
fe6e4b5
1
Parent(s):
0b6fcd5
Upload 5 files
Browse files- SMSSpamCollection.txt +0 -0
- cnn_tumor_model.ipynb +444 -0
- rnn_smsspam_model.h5 +3 -0
- rnn_smsspam_model.ipynb +486 -0
- rnn_smsspam_tokenizer.pickle +3 -0
SMSSpamCollection.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
cnn_tumor_model.ipynb
ADDED
@@ -0,0 +1,444 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "markdown",
|
5 |
+
"metadata": {},
|
6 |
+
"source": [
|
7 |
+
"### TUMOR DETECTION USING CNN - MODEL BUILDING"
|
8 |
+
]
|
9 |
+
},
|
10 |
+
{
|
11 |
+
"cell_type": "code",
|
12 |
+
"execution_count": 1,
|
13 |
+
"metadata": {},
|
14 |
+
"outputs": [],
|
15 |
+
"source": [
|
16 |
+
"import numpy as np\n",
|
17 |
+
"import matplotlib.pyplot as plt\n",
|
18 |
+
"import cv2\n",
|
19 |
+
"import os\n",
|
20 |
+
"import tensorflow as tf\n",
|
21 |
+
"from PIL import Image\n",
|
22 |
+
"from sklearn.model_selection import train_test_split\n",
|
23 |
+
"from sklearn.metrics import classification_report\n",
|
24 |
+
"from tqdm import tqdm"
|
25 |
+
]
|
26 |
+
},
|
27 |
+
{
|
28 |
+
"cell_type": "code",
|
29 |
+
"execution_count": 2,
|
30 |
+
"metadata": {},
|
31 |
+
"outputs": [],
|
32 |
+
"source": [
|
33 |
+
"image_dir = r'D:\\STUDY\\Sem3\\deeplearning\\Multitasking_app\\tumordata'\n",
|
34 |
+
"no_tumor_images = os.listdir(image_dir + '/no')\n",
|
35 |
+
"yes_tumor_images = os.listdir(image_dir + '/yes')\n"
|
36 |
+
]
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"cell_type": "code",
|
40 |
+
"execution_count": 3,
|
41 |
+
"metadata": {},
|
42 |
+
"outputs": [
|
43 |
+
{
|
44 |
+
"name": "stdout",
|
45 |
+
"output_type": "stream",
|
46 |
+
"text": [
|
47 |
+
"The length of NO Tumor images is 1500\n",
|
48 |
+
"The length of Tumor images is 1500\n"
|
49 |
+
]
|
50 |
+
}
|
51 |
+
],
|
52 |
+
"source": [
|
53 |
+
"print('The length of NO Tumor images is',len(no_tumor_images))\n",
|
54 |
+
"print('The length of Tumor images is',len(yes_tumor_images))"
|
55 |
+
]
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"cell_type": "code",
|
59 |
+
"execution_count": 4,
|
60 |
+
"metadata": {},
|
61 |
+
"outputs": [
|
62 |
+
{
|
63 |
+
"name": "stderr",
|
64 |
+
"output_type": "stream",
|
65 |
+
"text": [
|
66 |
+
"No Tumor: 1500it [00:21, 68.61it/s]\n",
|
67 |
+
"Tumor: 1500it [00:22, 67.90it/s]\n"
|
68 |
+
]
|
69 |
+
}
|
70 |
+
],
|
71 |
+
"source": [
|
72 |
+
"dataset=[]\n",
|
73 |
+
"label=[]\n",
|
74 |
+
"img_siz=(128,128)\n",
|
75 |
+
"\n",
|
76 |
+
"\n",
|
77 |
+
"for i , image_name in tqdm(enumerate(no_tumor_images),desc=\"No Tumor\"):\n",
|
78 |
+
" if(image_name.split('.')[1]=='jpg'):\n",
|
79 |
+
" image=cv2.imread(image_dir+'/no/'+image_name)\n",
|
80 |
+
" image=Image.fromarray(image,'RGB')\n",
|
81 |
+
" image=image.resize(img_siz)\n",
|
82 |
+
" dataset.append(np.array(image))\n",
|
83 |
+
" label.append(0)\n",
|
84 |
+
" \n",
|
85 |
+
" \n",
|
86 |
+
"for i ,image_name in tqdm(enumerate(yes_tumor_images),desc=\"Tumor\"):\n",
|
87 |
+
" if(image_name.split('.')[1]=='jpg'):\n",
|
88 |
+
" image=cv2.imread(image_dir+'/yes/'+image_name)\n",
|
89 |
+
" image=Image.fromarray(image,'RGB')\n",
|
90 |
+
" image=image.resize(img_siz)\n",
|
91 |
+
" dataset.append(np.array(image))\n",
|
92 |
+
" label.append(1)\n",
|
93 |
+
" \n",
|
94 |
+
" \n",
|
95 |
+
"dataset=np.array(dataset)\n",
|
96 |
+
"label = np.array(label)"
|
97 |
+
]
|
98 |
+
},
|
99 |
+
{
|
100 |
+
"cell_type": "code",
|
101 |
+
"execution_count": 5,
|
102 |
+
"metadata": {},
|
103 |
+
"outputs": [
|
104 |
+
{
|
105 |
+
"name": "stdout",
|
106 |
+
"output_type": "stream",
|
107 |
+
"text": [
|
108 |
+
"Dataset Length: 3000\n",
|
109 |
+
"Label Length: 3000\n"
|
110 |
+
]
|
111 |
+
}
|
112 |
+
],
|
113 |
+
"source": [
|
114 |
+
"print('Dataset Length: ',len(dataset))\n",
|
115 |
+
"print('Label Length: ',len(label))"
|
116 |
+
]
|
117 |
+
},
|
118 |
+
{
|
119 |
+
"cell_type": "code",
|
120 |
+
"execution_count": 6,
|
121 |
+
"metadata": {},
|
122 |
+
"outputs": [
|
123 |
+
{
|
124 |
+
"name": "stdout",
|
125 |
+
"output_type": "stream",
|
126 |
+
"text": [
|
127 |
+
"Train-Test Split\n",
|
128 |
+
"Normalising the Dataset.\n",
|
129 |
+
"\n"
|
130 |
+
]
|
131 |
+
}
|
132 |
+
],
|
133 |
+
"source": [
|
134 |
+
"# Training the model\n",
|
135 |
+
"print(\"Train-Test Split\")\n",
|
136 |
+
"x_train, x_test, y_train, y_test = train_test_split(dataset, label, test_size=0.2, random_state=42)\n",
|
137 |
+
"\n",
|
138 |
+
"# Normalizing the dataset\n",
|
139 |
+
"print(\"Normalising the Dataset.\\n\")\n",
|
140 |
+
"x_train = tf.keras.utils.normalize(x_train, axis=1)\n",
|
141 |
+
"x_test = tf.keras.utils.normalize(x_test, axis=1)"
|
142 |
+
]
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"cell_type": "code",
|
146 |
+
"execution_count": 7,
|
147 |
+
"metadata": {},
|
148 |
+
"outputs": [
|
149 |
+
{
|
150 |
+
"name": "stdout",
|
151 |
+
"output_type": "stream",
|
152 |
+
"text": [
|
153 |
+
"Model: \"sequential\"\n",
|
154 |
+
"_________________________________________________________________\n",
|
155 |
+
" Layer (type) Output Shape Param # \n",
|
156 |
+
"=================================================================\n",
|
157 |
+
" conv2d (Conv2D) (None, 126, 126, 32) 896 \n",
|
158 |
+
" \n",
|
159 |
+
" max_pooling2d (MaxPooling2 (None, 63, 63, 32) 0 \n",
|
160 |
+
" D) \n",
|
161 |
+
" \n",
|
162 |
+
" flatten (Flatten) (None, 127008) 0 \n",
|
163 |
+
" \n",
|
164 |
+
" dense (Dense) (None, 256) 32514304 \n",
|
165 |
+
" \n",
|
166 |
+
" dropout (Dropout) (None, 256) 0 \n",
|
167 |
+
" \n",
|
168 |
+
" dense_1 (Dense) (None, 512) 131584 \n",
|
169 |
+
" \n",
|
170 |
+
" dense_2 (Dense) (None, 1) 513 \n",
|
171 |
+
" \n",
|
172 |
+
"=================================================================\n",
|
173 |
+
"Total params: 32647297 (124.54 MB)\n",
|
174 |
+
"Trainable params: 32647297 (124.54 MB)\n",
|
175 |
+
"Non-trainable params: 0 (0.00 Byte)\n",
|
176 |
+
"_________________________________________________________________\n"
|
177 |
+
]
|
178 |
+
}
|
179 |
+
],
|
180 |
+
"source": [
|
181 |
+
"# Define the model\n",
|
182 |
+
"model = tf.keras.models.Sequential([\n",
|
183 |
+
" tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),\n",
|
184 |
+
" tf.keras.layers.MaxPooling2D((2, 2)),\n",
|
185 |
+
" tf.keras.layers.Flatten(),\n",
|
186 |
+
" tf.keras.layers.Dense(256, activation='relu'),\n",
|
187 |
+
" tf.keras.layers.Dropout(0.5),\n",
|
188 |
+
" tf.keras.layers.Dense(512, activation='relu'),\n",
|
189 |
+
" tf.keras.layers.Dense(1, activation='sigmoid')\n",
|
190 |
+
"])\n",
|
191 |
+
"model.summary()"
|
192 |
+
]
|
193 |
+
},
|
194 |
+
{
|
195 |
+
"cell_type": "code",
|
196 |
+
"execution_count": 8,
|
197 |
+
"metadata": {},
|
198 |
+
"outputs": [
|
199 |
+
{
|
200 |
+
"name": "stdout",
|
201 |
+
"output_type": "stream",
|
202 |
+
"text": [
|
203 |
+
"Training Started.\n",
|
204 |
+
"\n",
|
205 |
+
"Epoch 1/5\n",
|
206 |
+
"17/17 [==============================] - 19s 1s/step - loss: 0.7715 - accuracy: 0.6310 - val_loss: 0.5245 - val_accuracy: 0.7458\n",
|
207 |
+
"Epoch 2/5\n",
|
208 |
+
"17/17 [==============================] - 17s 999ms/step - loss: 0.4956 - accuracy: 0.7745 - val_loss: 0.4522 - val_accuracy: 0.7750\n",
|
209 |
+
"Epoch 3/5\n",
|
210 |
+
"17/17 [==============================] - 16s 964ms/step - loss: 0.3770 - accuracy: 0.8394 - val_loss: 0.3363 - val_accuracy: 0.8625\n",
|
211 |
+
"Epoch 4/5\n",
|
212 |
+
"17/17 [==============================] - 17s 988ms/step - loss: 0.2672 - accuracy: 0.8894 - val_loss: 0.3017 - val_accuracy: 0.8750\n",
|
213 |
+
"Epoch 5/5\n",
|
214 |
+
"17/17 [==============================] - 16s 960ms/step - loss: 0.1681 - accuracy: 0.9398 - val_loss: 0.1631 - val_accuracy: 0.9417\n",
|
215 |
+
"Training Finished.\n",
|
216 |
+
"\n"
|
217 |
+
]
|
218 |
+
},
|
219 |
+
{
|
220 |
+
"name": "stderr",
|
221 |
+
"output_type": "stream",
|
222 |
+
"text": [
|
223 |
+
"d:\\STUDY\\Sem3\\deeplearning\\DLENV\\lib\\site-packages\\keras\\src\\engine\\training.py:3079: UserWarning: You are saving your model as an HDF5 file via `model.save()`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')`.\n",
|
224 |
+
" saving_api.save_model(\n"
|
225 |
+
]
|
226 |
+
},
|
227 |
+
{
|
228 |
+
"name": "stdout",
|
229 |
+
"output_type": "stream",
|
230 |
+
"text": [
|
231 |
+
"Model Saved.\n",
|
232 |
+
"\n"
|
233 |
+
]
|
234 |
+
}
|
235 |
+
],
|
236 |
+
"source": [
|
237 |
+
"# Compile the model\n",
|
238 |
+
"model.compile(optimizer='adam',\n",
|
239 |
+
" loss='binary_crossentropy',\n",
|
240 |
+
" metrics=['accuracy'])\n",
|
241 |
+
"\n",
|
242 |
+
"# Train the model\n",
|
243 |
+
"print(\"Training Started.\\n\")\n",
|
244 |
+
"history = model.fit(x_train, y_train, epochs=5, batch_size=128, validation_split=0.1)\n",
|
245 |
+
"print(\"Training Finished.\\n\")\n",
|
246 |
+
"\n",
|
247 |
+
"# Save the trained model\n",
|
248 |
+
"model.save('cnn_tumor_model.h5')\n",
|
249 |
+
"print(\"Model Saved.\\n\")"
|
250 |
+
]
|
251 |
+
},
|
252 |
+
{
|
253 |
+
"cell_type": "code",
|
254 |
+
"execution_count": 9,
|
255 |
+
"metadata": {},
|
256 |
+
"outputs": [
|
257 |
+
{
|
258 |
+
"data": {
|
259 |
+
"image/png": "",
|
260 |
+
"text/plain": [
|
261 |
+
"<Figure size 640x480 with 1 Axes>"
|
262 |
+
]
|
263 |
+
},
|
264 |
+
"metadata": {},
|
265 |
+
"output_type": "display_data"
|
266 |
+
}
|
267 |
+
],
|
268 |
+
"source": [
|
269 |
+
"# Plot and save accuracy\n",
|
270 |
+
"plt.plot(history.epoch,history.history['accuracy'], label='accuracy')\n",
|
271 |
+
"plt.plot(history.epoch,history.history['val_accuracy'], label = 'val_accuracy')\n",
|
272 |
+
"plt.xlabel('Epoch')\n",
|
273 |
+
"plt.ylabel('Accuracy')\n",
|
274 |
+
"plt.ylim([0, 1])\n",
|
275 |
+
"plt.legend(loc='lower right')\n",
|
276 |
+
"plt.show()"
|
277 |
+
]
|
278 |
+
},
|
279 |
+
{
|
280 |
+
"cell_type": "code",
|
281 |
+
"execution_count": 10,
|
282 |
+
"metadata": {},
|
283 |
+
"outputs": [
|
284 |
+
{
|
285 |
+
"data": {
|
286 |
+
"image/png": "",
|
287 |
+
"text/plain": [
|
288 |
+
"<Figure size 640x480 with 1 Axes>"
|
289 |
+
]
|
290 |
+
},
|
291 |
+
"metadata": {},
|
292 |
+
"output_type": "display_data"
|
293 |
+
}
|
294 |
+
],
|
295 |
+
"source": [
|
296 |
+
"# Plot and save loss\n",
|
297 |
+
"plt.plot(history.epoch,history.history['loss'], label='loss')\n",
|
298 |
+
"plt.plot(history.epoch,history.history['val_loss'], label = 'val_loss')\n",
|
299 |
+
"plt.xlabel('Epoch')\n",
|
300 |
+
"plt.ylabel('Loss')\n",
|
301 |
+
"plt.legend(loc='upper right')\n",
|
302 |
+
"plt.show()"
|
303 |
+
]
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"cell_type": "code",
|
307 |
+
"execution_count": 11,
|
308 |
+
"metadata": {},
|
309 |
+
"outputs": [
|
310 |
+
{
|
311 |
+
"name": "stdout",
|
312 |
+
"output_type": "stream",
|
313 |
+
"text": [
|
314 |
+
"Model Evalutaion Phase.\n",
|
315 |
+
"\n",
|
316 |
+
"19/19 [==============================] - 1s 54ms/step - loss: 0.1471 - accuracy: 0.9483\n",
|
317 |
+
"Accuracy: 94.83\n",
|
318 |
+
"19/19 [==============================] - 1s 50ms/step\n",
|
319 |
+
"classification Report\n",
|
320 |
+
" precision recall f1-score support\n",
|
321 |
+
"\n",
|
322 |
+
" 0 0.96 0.94 0.95 313\n",
|
323 |
+
" 1 0.94 0.96 0.95 287\n",
|
324 |
+
"\n",
|
325 |
+
" accuracy 0.95 600\n",
|
326 |
+
" macro avg 0.95 0.95 0.95 600\n",
|
327 |
+
"weighted avg 0.95 0.95 0.95 600\n",
|
328 |
+
"\n"
|
329 |
+
]
|
330 |
+
}
|
331 |
+
],
|
332 |
+
"source": [
|
333 |
+
"# Model evaluation\n",
|
334 |
+
"print(\"Model Evalutaion Phase.\\n\")\n",
|
335 |
+
"loss,accuracy=model.evaluate(x_test,y_test)\n",
|
336 |
+
"print(f'Accuracy: {round(accuracy*100,2)}')\n",
|
337 |
+
"y_pred=model.predict(x_test)\n",
|
338 |
+
"y_pred = (y_pred > 0.5).astype(int)\n",
|
339 |
+
"print('classification Report\\n',classification_report(y_test,y_pred))"
|
340 |
+
]
|
341 |
+
},
|
342 |
+
{
|
343 |
+
"cell_type": "code",
|
344 |
+
"execution_count": 12,
|
345 |
+
"metadata": {},
|
346 |
+
"outputs": [
|
347 |
+
{
|
348 |
+
"name": "stdout",
|
349 |
+
"output_type": "stream",
|
350 |
+
"text": [
|
351 |
+
"Model Prediction.\n",
|
352 |
+
"\n"
|
353 |
+
]
|
354 |
+
}
|
355 |
+
],
|
356 |
+
"source": [
|
357 |
+
"# Model prediction function\n",
|
358 |
+
"print(\"Model Prediction.\\n\")\n",
|
359 |
+
"\n",
|
360 |
+
"def make_prediction(img,model):\n",
|
361 |
+
" img=cv2.imread(img)\n",
|
362 |
+
" img=Image.fromarray(img)\n",
|
363 |
+
" img=img.resize((128,128))\n",
|
364 |
+
" img=np.array(img)\n",
|
365 |
+
" input_img = np.expand_dims(img, axis=0)\n",
|
366 |
+
" res = model.predict(input_img)\n",
|
367 |
+
" if res:\n",
|
368 |
+
" print(\"Tumor Detected\")\n",
|
369 |
+
" else:\n",
|
370 |
+
" print(\"No Tumor\")\n",
|
371 |
+
" "
|
372 |
+
]
|
373 |
+
},
|
374 |
+
{
|
375 |
+
"cell_type": "code",
|
376 |
+
"execution_count": 13,
|
377 |
+
"metadata": {},
|
378 |
+
"outputs": [
|
379 |
+
{
|
380 |
+
"name": "stdout",
|
381 |
+
"output_type": "stream",
|
382 |
+
"text": [
|
383 |
+
"1/1 [==============================] - 0s 65ms/step\n",
|
384 |
+
"Tumor Detected\n",
|
385 |
+
"--------------------------------------\n",
|
386 |
+
"\n",
|
387 |
+
"1/1 [==============================] - 0s 33ms/step\n",
|
388 |
+
"No Tumor\n",
|
389 |
+
"--------------------------------------\n",
|
390 |
+
"\n"
|
391 |
+
]
|
392 |
+
}
|
393 |
+
],
|
394 |
+
"source": [
|
395 |
+
"\n",
|
396 |
+
"\n",
|
397 |
+
"make_prediction(r'D:\\STUDY\\Sem3\\deeplearning\\Multitasking_app\\tumordata\\yes\\y6.jpg',model)\n",
|
398 |
+
"print(\"--------------------------------------\\n\")\n",
|
399 |
+
"make_prediction(r'D:\\STUDY\\Sem3\\deeplearning\\Multitasking_app\\tumordata\\no\\no1.jpg',model)\n",
|
400 |
+
"print(\"--------------------------------------\\n\")"
|
401 |
+
]
|
402 |
+
},
|
403 |
+
{
|
404 |
+
"cell_type": "code",
|
405 |
+
"execution_count": 14,
|
406 |
+
"metadata": {},
|
407 |
+
"outputs": [],
|
408 |
+
"source": [
|
409 |
+
"# Load the saved model in your Streamlit app using this code\n",
|
410 |
+
"from tensorflow.keras.models import load_model\n",
|
411 |
+
"\n",
|
412 |
+
"loaded_model = load_model('cnn_tumor_model.h5')\n"
|
413 |
+
]
|
414 |
+
},
|
415 |
+
{
|
416 |
+
"cell_type": "code",
|
417 |
+
"execution_count": null,
|
418 |
+
"metadata": {},
|
419 |
+
"outputs": [],
|
420 |
+
"source": []
|
421 |
+
}
|
422 |
+
],
|
423 |
+
"metadata": {
|
424 |
+
"kernelspec": {
|
425 |
+
"display_name": "DLENV",
|
426 |
+
"language": "python",
|
427 |
+
"name": "python3"
|
428 |
+
},
|
429 |
+
"language_info": {
|
430 |
+
"codemirror_mode": {
|
431 |
+
"name": "ipython",
|
432 |
+
"version": 3
|
433 |
+
},
|
434 |
+
"file_extension": ".py",
|
435 |
+
"mimetype": "text/x-python",
|
436 |
+
"name": "python",
|
437 |
+
"nbconvert_exporter": "python",
|
438 |
+
"pygments_lexer": "ipython3",
|
439 |
+
"version": "3.10.11"
|
440 |
+
}
|
441 |
+
},
|
442 |
+
"nbformat": 4,
|
443 |
+
"nbformat_minor": 2
|
444 |
+
}
|
rnn_smsspam_model.h5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:655848356d2db2b7e21e6bee381bd951b0388c433c4772088c3f7e0e7bd8da38
|
3 |
+
size 2269016
|
rnn_smsspam_model.ipynb
ADDED
@@ -0,0 +1,486 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "markdown",
|
5 |
+
"metadata": {},
|
6 |
+
"source": [
|
7 |
+
"### SMS SPAM DETECTION USING RNN"
|
8 |
+
]
|
9 |
+
},
|
10 |
+
{
|
11 |
+
"cell_type": "code",
|
12 |
+
"execution_count": 1,
|
13 |
+
"metadata": {},
|
14 |
+
"outputs": [],
|
15 |
+
"source": [
|
16 |
+
"import pandas as pd\n",
|
17 |
+
"import matplotlib.pyplot as plt\n",
|
18 |
+
"import seaborn as sns\n",
|
19 |
+
"from sklearn.model_selection import train_test_split\n",
|
20 |
+
"import tensorflow as tf\n",
|
21 |
+
"from sklearn.metrics import classification_report, confusion_matrix, accuracy_score\n",
|
22 |
+
"import pickle"
|
23 |
+
]
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"cell_type": "code",
|
27 |
+
"execution_count": 2,
|
28 |
+
"metadata": {},
|
29 |
+
"outputs": [],
|
30 |
+
"source": [
|
31 |
+
"# Downloading Dataset\n",
|
32 |
+
"dataset = pd.read_csv(r'SMSSpamCollection.txt', sep='\\t', names=['label', 'message'])"
|
33 |
+
]
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"cell_type": "code",
|
37 |
+
"execution_count": 3,
|
38 |
+
"metadata": {},
|
39 |
+
"outputs": [
|
40 |
+
{
|
41 |
+
"name": "stdout",
|
42 |
+
"output_type": "stream",
|
43 |
+
"text": [
|
44 |
+
" label message\n",
|
45 |
+
"0 ham Go until jurong point, crazy.. Available only ...\n",
|
46 |
+
"1 ham Ok lar... Joking wif u oni...\n",
|
47 |
+
"2 spam Free entry in 2 a wkly comp to win FA Cup fina...\n",
|
48 |
+
"3 ham U dun say so early hor... U c already then say...\n",
|
49 |
+
"4 ham Nah I don't think he goes to usf, he lives aro...\n",
|
50 |
+
"---------------------- -------------------------\n",
|
51 |
+
" message \n",
|
52 |
+
" count unique top freq\n",
|
53 |
+
"label \n",
|
54 |
+
"ham 4825 4516 Sorry, I'll call later 30\n",
|
55 |
+
"spam 747 653 Please call our customer service representativ... 4\n"
|
56 |
+
]
|
57 |
+
}
|
58 |
+
],
|
59 |
+
"source": [
|
60 |
+
"print(dataset.head())\n",
|
61 |
+
"print(\"---------------------- -------------------------\")\n",
|
62 |
+
"print(dataset.groupby('label').describe())"
|
63 |
+
]
|
64 |
+
},
|
65 |
+
{
|
66 |
+
"cell_type": "code",
|
67 |
+
"execution_count": 4,
|
68 |
+
"metadata": {},
|
69 |
+
"outputs": [],
|
70 |
+
"source": [
|
71 |
+
"# Preprocessing\n",
|
72 |
+
"dataset['label'] = dataset['label'].map({'spam': 1, 'ham': 0})\n",
|
73 |
+
"X = dataset['message'].values\n",
|
74 |
+
"y = dataset['label'].values"
|
75 |
+
]
|
76 |
+
},
|
77 |
+
{
|
78 |
+
"cell_type": "code",
|
79 |
+
"execution_count": 5,
|
80 |
+
"metadata": {},
|
81 |
+
"outputs": [
|
82 |
+
{
|
83 |
+
"name": "stdout",
|
84 |
+
"output_type": "stream",
|
85 |
+
"text": [
|
86 |
+
"[[387, 245, 325, 450, 917, 432, 1, 1323, 169, 2377], [19, 4, 1021, 112, 93, 6, 40, 358]]\n"
|
87 |
+
]
|
88 |
+
}
|
89 |
+
],
|
90 |
+
"source": [
|
91 |
+
"# Train Test Split\n",
|
92 |
+
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)\n",
|
93 |
+
"\n",
|
94 |
+
"tokeniser = tf.keras.preprocessing.text.Tokenizer()\n",
|
95 |
+
"tokeniser.fit_on_texts(X_train)\n",
|
96 |
+
"\n",
|
97 |
+
"# Save the tokenizer using pickle\n",
|
98 |
+
"with open('rnn_smsspam_tokenizer.pickle', 'wb') as handle:\n",
|
99 |
+
" pickle.dump(tokeniser, handle, protocol=pickle.HIGHEST_PROTOCOL)\n",
|
100 |
+
"\n",
|
101 |
+
"encoded_train = tokeniser.texts_to_sequences(X_train)\n",
|
102 |
+
"encoded_test = tokeniser.texts_to_sequences(X_test)\n",
|
103 |
+
"print(encoded_train[0:2])"
|
104 |
+
]
|
105 |
+
},
|
106 |
+
{
|
107 |
+
"cell_type": "code",
|
108 |
+
"execution_count": 6,
|
109 |
+
"metadata": {},
|
110 |
+
"outputs": [
|
111 |
+
{
|
112 |
+
"name": "stdout",
|
113 |
+
"output_type": "stream",
|
114 |
+
"text": [
|
115 |
+
"[[ 14 61 388 540 3557 23 3558 0 0 0 0 0 0 0\n",
|
116 |
+
" 0 0 0 0 0 0]\n",
|
117 |
+
" [ 474 59 35 10 61 22 63 75 76 0 0 0 0 0\n",
|
118 |
+
" 0 0 0 0 0 0]\n",
|
119 |
+
" [ 36 727 180 26 3559 2396 452 41 9 1850 0 0 0 0\n",
|
120 |
+
" 0 0 0 0 0 0]\n",
|
121 |
+
" [ 518 2397 158 73 243 10 48 92 0 0 0 0 0 0\n",
|
122 |
+
" 0 0 0 0 0 0]]\n"
|
123 |
+
]
|
124 |
+
}
|
125 |
+
],
|
126 |
+
"source": [
|
127 |
+
"# Padding\n",
|
128 |
+
"max_length = 20\n",
|
129 |
+
"padded_train = tf.keras.preprocessing.sequence.pad_sequences(encoded_train, maxlen=max_length, padding='post')\n",
|
130 |
+
"padded_test = tf.keras.preprocessing.sequence.pad_sequences(encoded_test, maxlen=max_length, padding='post')\n",
|
131 |
+
"print(padded_train[30:34])"
|
132 |
+
]
|
133 |
+
},
|
134 |
+
{
|
135 |
+
"cell_type": "code",
|
136 |
+
"execution_count": 7,
|
137 |
+
"metadata": {},
|
138 |
+
"outputs": [],
|
139 |
+
"source": [
|
140 |
+
"vocab_size = len(tokeniser.word_index) + 1"
|
141 |
+
]
|
142 |
+
},
|
143 |
+
{
|
144 |
+
"cell_type": "code",
|
145 |
+
"execution_count": 8,
|
146 |
+
"metadata": {},
|
147 |
+
"outputs": [],
|
148 |
+
"source": [
|
149 |
+
"# Model definition\n",
|
150 |
+
"model=tf.keras.models.Sequential([\n",
|
151 |
+
" tf.keras.layers.Embedding(input_dim=vocab_size,output_dim= 24, input_length=max_length),\n",
|
152 |
+
" tf.keras.layers.SimpleRNN(24, return_sequences=False),\n",
|
153 |
+
" tf.keras.layers.Dense(64, activation='relu'),\n",
|
154 |
+
" tf.keras.layers.Dense(32, activation='relu'),\n",
|
155 |
+
" tf.keras.layers.Dense(1, activation='sigmoid')\n",
|
156 |
+
"])"
|
157 |
+
]
|
158 |
+
},
|
159 |
+
{
|
160 |
+
"cell_type": "code",
|
161 |
+
"execution_count": 9,
|
162 |
+
"metadata": {},
|
163 |
+
"outputs": [
|
164 |
+
{
|
165 |
+
"name": "stdout",
|
166 |
+
"output_type": "stream",
|
167 |
+
"text": [
|
168 |
+
"Model: \"sequential\"\n",
|
169 |
+
"_________________________________________________________________\n",
|
170 |
+
" Layer (type) Output Shape Param # \n",
|
171 |
+
"=================================================================\n",
|
172 |
+
" embedding (Embedding) (None, 20, 24) 180048 \n",
|
173 |
+
" \n",
|
174 |
+
" simple_rnn (SimpleRNN) (None, 24) 1176 \n",
|
175 |
+
" \n",
|
176 |
+
" dense (Dense) (None, 64) 1600 \n",
|
177 |
+
" \n",
|
178 |
+
" dense_1 (Dense) (None, 32) 2080 \n",
|
179 |
+
" \n",
|
180 |
+
" dense_2 (Dense) (None, 1) 33 \n",
|
181 |
+
" \n",
|
182 |
+
"=================================================================\n",
|
183 |
+
"Total params: 184937 (722.41 KB)\n",
|
184 |
+
"Trainable params: 184937 (722.41 KB)\n",
|
185 |
+
"Non-trainable params: 0 (0.00 Byte)\n",
|
186 |
+
"_________________________________________________________________\n",
|
187 |
+
"None\n"
|
188 |
+
]
|
189 |
+
}
|
190 |
+
],
|
191 |
+
"source": [
|
192 |
+
"# compile the model\n",
|
193 |
+
"model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])\n",
|
194 |
+
"\n",
|
195 |
+
"# summarize the model\n",
|
196 |
+
"print(model.summary())\n",
|
197 |
+
"\n",
|
198 |
+
"# Early stopping callback\n",
|
199 |
+
"early_stop = tf.keras.callbacks.EarlyStopping(monitor='accuracy', mode='min', patience=10)"
|
200 |
+
]
|
201 |
+
},
|
202 |
+
{
|
203 |
+
"cell_type": "code",
|
204 |
+
"execution_count": 10,
|
205 |
+
"metadata": {},
|
206 |
+
"outputs": [
|
207 |
+
{
|
208 |
+
"name": "stdout",
|
209 |
+
"output_type": "stream",
|
210 |
+
"text": [
|
211 |
+
"Epoch 1/50\n",
|
212 |
+
"122/122 [==============================] - 2s 6ms/step - loss: 0.3228 - accuracy: 0.8815 - val_loss: 0.1136 - val_accuracy: 0.9689\n",
|
213 |
+
"Epoch 2/50\n",
|
214 |
+
"122/122 [==============================] - 1s 4ms/step - loss: 0.0584 - accuracy: 0.9851 - val_loss: 0.0625 - val_accuracy: 0.9803\n",
|
215 |
+
"Epoch 3/50\n",
|
216 |
+
"122/122 [==============================] - 1s 5ms/step - loss: 0.0212 - accuracy: 0.9951 - val_loss: 0.0749 - val_accuracy: 0.9833\n",
|
217 |
+
"Epoch 4/50\n",
|
218 |
+
"122/122 [==============================] - 1s 5ms/step - loss: 0.0110 - accuracy: 0.9974 - val_loss: 0.0591 - val_accuracy: 0.9850\n",
|
219 |
+
"Epoch 5/50\n",
|
220 |
+
"122/122 [==============================] - 1s 5ms/step - loss: 0.0090 - accuracy: 0.9974 - val_loss: 0.0606 - val_accuracy: 0.9862\n",
|
221 |
+
"Epoch 6/50\n",
|
222 |
+
"122/122 [==============================] - 1s 5ms/step - loss: 0.0059 - accuracy: 0.9985 - val_loss: 0.0905 - val_accuracy: 0.9827\n",
|
223 |
+
"Epoch 7/50\n",
|
224 |
+
"122/122 [==============================] - 1s 5ms/step - loss: 0.0021 - accuracy: 0.9995 - val_loss: 0.0832 - val_accuracy: 0.9809\n",
|
225 |
+
"Epoch 8/50\n",
|
226 |
+
"122/122 [==============================] - 1s 5ms/step - loss: 0.0016 - accuracy: 0.9995 - val_loss: 0.0882 - val_accuracy: 0.9773\n",
|
227 |
+
"Epoch 9/50\n",
|
228 |
+
"122/122 [==============================] - 1s 5ms/step - loss: 6.3037e-04 - accuracy: 1.0000 - val_loss: 0.0881 - val_accuracy: 0.9839\n",
|
229 |
+
"Epoch 10/50\n",
|
230 |
+
"122/122 [==============================] - 1s 5ms/step - loss: 5.9414e-05 - accuracy: 1.0000 - val_loss: 0.1063 - val_accuracy: 0.9809\n",
|
231 |
+
"Epoch 11/50\n",
|
232 |
+
"122/122 [==============================] - 1s 5ms/step - loss: 3.9679e-05 - accuracy: 1.0000 - val_loss: 0.1138 - val_accuracy: 0.9803\n"
|
233 |
+
]
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"data": {
|
237 |
+
"text/plain": [
|
238 |
+
"<keras.src.callbacks.History at 0x1ff2a08e410>"
|
239 |
+
]
|
240 |
+
},
|
241 |
+
"execution_count": 10,
|
242 |
+
"metadata": {},
|
243 |
+
"output_type": "execute_result"
|
244 |
+
}
|
245 |
+
],
|
246 |
+
"source": [
|
247 |
+
"# Model training\n",
|
248 |
+
"model.fit(x=padded_train,\n",
|
249 |
+
" y=y_train,\n",
|
250 |
+
" epochs=50,\n",
|
251 |
+
" validation_data=(padded_test, y_test),\n",
|
252 |
+
" callbacks=[early_stop]\n",
|
253 |
+
" )"
|
254 |
+
]
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"cell_type": "code",
|
258 |
+
"execution_count": 11,
|
259 |
+
"metadata": {},
|
260 |
+
"outputs": [
|
261 |
+
{
|
262 |
+
"name": "stdout",
|
263 |
+
"output_type": "stream",
|
264 |
+
"text": [
|
265 |
+
"53/53 [==============================] - 0s 1ms/step\n"
|
266 |
+
]
|
267 |
+
}
|
268 |
+
],
|
269 |
+
"source": [
|
270 |
+
"# Generate predictions after model training\n",
|
271 |
+
"preds = (model.predict(padded_test) > 0.5).astype(\"int32\")"
|
272 |
+
]
|
273 |
+
},
|
274 |
+
{
|
275 |
+
"cell_type": "code",
|
276 |
+
"execution_count": 12,
|
277 |
+
"metadata": {},
|
278 |
+
"outputs": [
|
279 |
+
{
|
280 |
+
"name": "stdout",
|
281 |
+
"output_type": "stream",
|
282 |
+
"text": [
|
283 |
+
"Classification Report\n",
|
284 |
+
" precision recall f1-score support\n",
|
285 |
+
"\n",
|
286 |
+
" 0 0.99 0.99 0.99 1448\n",
|
287 |
+
" 1 0.94 0.91 0.92 224\n",
|
288 |
+
"\n",
|
289 |
+
" accuracy 0.98 1672\n",
|
290 |
+
" macro avg 0.96 0.95 0.96 1672\n",
|
291 |
+
"weighted avg 0.98 0.98 0.98 1672\n",
|
292 |
+
"\n",
|
293 |
+
"Accuracy : 98.03\n"
|
294 |
+
]
|
295 |
+
}
|
296 |
+
],
|
297 |
+
"source": [
|
298 |
+
"# Classification report\n",
|
299 |
+
"print(\"Classification Report\")\n",
|
300 |
+
"print(classification_report(y_test, preds))\n",
|
301 |
+
"\n",
|
302 |
+
"# Accuracy score\n",
|
303 |
+
"acc_sc = accuracy_score(y_test, preds)\n",
|
304 |
+
"print(f\"Accuracy : {round(acc_sc * 100, 2)}\")"
|
305 |
+
]
|
306 |
+
},
|
307 |
+
{
|
308 |
+
"cell_type": "code",
|
309 |
+
"execution_count": 13,
|
310 |
+
"metadata": {},
|
311 |
+
"outputs": [
|
312 |
+
{
|
313 |
+
"data": {
|
314 |
+
"image/png": "",
|
315 |
+
"text/plain": [
|
316 |
+
"<Figure size 640x480 with 1 Axes>"
|
317 |
+
]
|
318 |
+
},
|
319 |
+
"metadata": {},
|
320 |
+
"output_type": "display_data"
|
321 |
+
}
|
322 |
+
],
|
323 |
+
"source": [
|
324 |
+
"# Confusion matrix plotting\n",
|
325 |
+
"mtx = confusion_matrix(y_test, preds)\n",
|
326 |
+
"sns.heatmap(mtx, annot=True, fmt='d', linewidths=.5, cmap=\"Blues\", cbar=False)\n",
|
327 |
+
"plt.ylabel('True label')\n",
|
328 |
+
"plt.xlabel('Predicted label')\n",
|
329 |
+
"plt.show() # Display the plot"
|
330 |
+
]
|
331 |
+
},
|
332 |
+
{
|
333 |
+
"cell_type": "code",
|
334 |
+
"execution_count": 14,
|
335 |
+
"metadata": {},
|
336 |
+
"outputs": [
|
337 |
+
{
|
338 |
+
"name": "stderr",
|
339 |
+
"output_type": "stream",
|
340 |
+
"text": [
|
341 |
+
"d:\\STUDY\\Sem3\\deeplearning\\DLENV\\lib\\site-packages\\keras\\src\\engine\\training.py:3079: UserWarning: You are saving your model as an HDF5 file via `model.save()`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')`.\n",
|
342 |
+
" saving_api.save_model(\n"
|
343 |
+
]
|
344 |
+
}
|
345 |
+
],
|
346 |
+
"source": [
|
347 |
+
"# Save the trained model\n",
|
348 |
+
"model.save(\"rnn_smsspam_model.h5\")\n",
|
349 |
+
"rnn_smsspam_model = tf.keras.models.load_model('rnn_smsspam_model.h5')"
|
350 |
+
]
|
351 |
+
},
|
352 |
+
{
|
353 |
+
"cell_type": "code",
|
354 |
+
"execution_count": 17,
|
355 |
+
"metadata": {},
|
356 |
+
"outputs": [],
|
357 |
+
"source": [
|
358 |
+
"def predict_message(input_text):\n",
|
359 |
+
" # Process input text similarly to training data\n",
|
360 |
+
" encoded_input = tokeniser.texts_to_sequences([input_text])\n",
|
361 |
+
" padded_input = tf.keras.preprocessing.sequence.pad_sequences(encoded_input, maxlen=max_length, padding='post')\n",
|
362 |
+
" \n",
|
363 |
+
" # Get the probabilities of being classified as \"Spam\" for each input\n",
|
364 |
+
" predictions = rnn_smsspam_model.predict(padded_input)\n",
|
365 |
+
" \n",
|
366 |
+
" # Define a threshold (e.g., 0.5) for classification\n",
|
367 |
+
" threshold = 0.5\n",
|
368 |
+
"\n",
|
369 |
+
" # Make the predictions based on the threshold for each input\n",
|
370 |
+
" results = []\n",
|
371 |
+
" for prediction in predictions:\n",
|
372 |
+
" if prediction > threshold:\n",
|
373 |
+
" results.append(\"Spam\")\n",
|
374 |
+
" else:\n",
|
375 |
+
" results.append(\"Not spam\")\n",
|
376 |
+
" \n",
|
377 |
+
" return results\n"
|
378 |
+
]
|
379 |
+
},
|
380 |
+
{
|
381 |
+
"cell_type": "code",
|
382 |
+
"execution_count": 18,
|
383 |
+
"metadata": {},
|
384 |
+
"outputs": [
|
385 |
+
{
|
386 |
+
"name": "stdout",
|
387 |
+
"output_type": "stream",
|
388 |
+
"text": [
|
389 |
+
"1/1 [==============================] - 0s 149ms/step\n",
|
390 |
+
"Message: Your free ringtone is waiting to be collected. Simply text the password \"MIX\" to 85069 to verify. Get Usher and Britney. FML, PO Box 5249, MK17 92H. 450Ppw 16 haWatching telugu movie..wat abt u? \n",
|
391 |
+
"The message is classified as: ['Spam']\n"
|
392 |
+
]
|
393 |
+
}
|
394 |
+
],
|
395 |
+
"source": [
|
396 |
+
"# Take user input for prediction\n",
|
397 |
+
"user_input =('Your free ringtone is waiting to be collected. Simply text the password \"MIX\" to 85069 to verify. Get Usher and Britney. FML, PO Box 5249, MK17 92H. 450Ppw 16 haWatching telugu movie..wat abt u?')\n",
|
398 |
+
"prediction_result = predict_message(user_input)\n",
|
399 |
+
"print(f\"Message: {user_input} \\nThe message is classified as: {prediction_result}\")"
|
400 |
+
]
|
401 |
+
},
|
402 |
+
{
|
403 |
+
"cell_type": "code",
|
404 |
+
"execution_count": 19,
|
405 |
+
"metadata": {},
|
406 |
+
"outputs": [
|
407 |
+
{
|
408 |
+
"name": "stdout",
|
409 |
+
"output_type": "stream",
|
410 |
+
"text": [
|
411 |
+
"1/1 [==============================] - 0s 20ms/step\n",
|
412 |
+
"Message: XXXMobileMovieClub: To use your credit, click the WAP link in the next txt message or click here>> http://wap. xxxmobilemovieclub.com?n=QJKGIGHJJGCBL \n",
|
413 |
+
"The message is classified as: ['Spam']\n"
|
414 |
+
]
|
415 |
+
}
|
416 |
+
],
|
417 |
+
"source": [
|
418 |
+
"\n",
|
419 |
+
"user_input_1 = ('XXXMobileMovieClub: To use your credit, click the WAP link in the next txt message or click here>> http://wap. xxxmobilemovieclub.com?n=QJKGIGHJJGCBL')\n",
|
420 |
+
"\n",
|
421 |
+
"\n",
|
422 |
+
"prediction_result_1 = predict_message(user_input_1)\n",
|
423 |
+
"print(f\"Message: {user_input_1} \\nThe message is classified as: {prediction_result_1}\")\n",
|
424 |
+
" "
|
425 |
+
]
|
426 |
+
},
|
427 |
+
{
|
428 |
+
"cell_type": "code",
|
429 |
+
"execution_count": 21,
|
430 |
+
"metadata": {},
|
431 |
+
"outputs": [
|
432 |
+
{
|
433 |
+
"name": "stdout",
|
434 |
+
"output_type": "stream",
|
435 |
+
"text": [
|
436 |
+
"1/1 [==============================] - 0s 20ms/step\n",
|
437 |
+
"Message: Hi i want to speak to you \n",
|
438 |
+
"The message is classified as: ['Not spam']\n"
|
439 |
+
]
|
440 |
+
}
|
441 |
+
],
|
442 |
+
"source": [
|
443 |
+
"user_input= ('Hi i want to speak to you')\n",
|
444 |
+
"\n",
|
445 |
+
"\n",
|
446 |
+
"prediction_result= predict_message(user_input)\n",
|
447 |
+
"print(f\"Message: {user_input} \\nThe message is classified as: {prediction_result}\")"
|
448 |
+
]
|
449 |
+
},
|
450 |
+
{
|
451 |
+
"cell_type": "code",
|
452 |
+
"execution_count": null,
|
453 |
+
"metadata": {},
|
454 |
+
"outputs": [],
|
455 |
+
"source": []
|
456 |
+
},
|
457 |
+
{
|
458 |
+
"cell_type": "code",
|
459 |
+
"execution_count": null,
|
460 |
+
"metadata": {},
|
461 |
+
"outputs": [],
|
462 |
+
"source": []
|
463 |
+
}
|
464 |
+
],
|
465 |
+
"metadata": {
|
466 |
+
"kernelspec": {
|
467 |
+
"display_name": "DLENV",
|
468 |
+
"language": "python",
|
469 |
+
"name": "python3"
|
470 |
+
},
|
471 |
+
"language_info": {
|
472 |
+
"codemirror_mode": {
|
473 |
+
"name": "ipython",
|
474 |
+
"version": 3
|
475 |
+
},
|
476 |
+
"file_extension": ".py",
|
477 |
+
"mimetype": "text/x-python",
|
478 |
+
"name": "python",
|
479 |
+
"nbconvert_exporter": "python",
|
480 |
+
"pygments_lexer": "ipython3",
|
481 |
+
"version": "3.10.11"
|
482 |
+
}
|
483 |
+
},
|
484 |
+
"nbformat": 4,
|
485 |
+
"nbformat_minor": 2
|
486 |
+
}
|
rnn_smsspam_tokenizer.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d6a5664ae5e54ced758c4c3749a6d0d76c4fee35b853d4351f3596e06cde21e6
|
3 |
+
size 290462
|