KarthikaRajagopal commited on
Commit
43bbc2e
·
verified ·
1 Parent(s): 71f8089

Upload Sentiment Analysis of Restaurant Reviews.ipynb

Browse files
Sentiment Analysis of Restaurant Reviews.ipynb ADDED
@@ -0,0 +1,776 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "name": "Sentiment Analysis - Restaurant Reviews.ipynb",
7
+ "provenance": [],
8
+ "collapsed_sections": [],
9
+ "toc_visible": true
10
+ },
11
+ "kernelspec": {
12
+ "name": "python3",
13
+ "display_name": "Python 3"
14
+ }
15
+ },
16
+ "cells": [
17
+ {
18
+ "cell_type": "code",
19
+ "metadata": {
20
+ "id": "kh4udnC9fZyU",
21
+ "colab_type": "code",
22
+ "outputId": "677fbeb5-d5b2-49f7-99bf-92bd1f2fa44e",
23
+ "colab": {
24
+ "base_uri": "https://localhost:8080/",
25
+ "height": 34
26
+ }
27
+ },
28
+ "source": [
29
+ "# Connecting Google Drive with Google Colab\n",
30
+ "from google.colab import drive\n",
31
+ "drive.mount('/content/drive/')"
32
+ ],
33
+ "execution_count": 1,
34
+ "outputs": [
35
+ {
36
+ "output_type": "stream",
37
+ "text": [
38
+ "Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount(\"/content/drive/\", force_remount=True).\n"
39
+ ],
40
+ "name": "stdout"
41
+ }
42
+ ]
43
+ },
44
+ {
45
+ "cell_type": "code",
46
+ "metadata": {
47
+ "id": "wqtOguIVfysM",
48
+ "colab_type": "code",
49
+ "colab": {}
50
+ },
51
+ "source": [
52
+ "# Importing essential libraries\n",
53
+ "import numpy as np\n",
54
+ "import pandas as pd"
55
+ ],
56
+ "execution_count": 0,
57
+ "outputs": []
58
+ },
59
+ {
60
+ "cell_type": "code",
61
+ "metadata": {
62
+ "id": "FsZFCtjijekC",
63
+ "colab_type": "code",
64
+ "colab": {}
65
+ },
66
+ "source": [
67
+ "# Loading the dataset\n",
68
+ "df = pd.read_csv('/content/drive/My Drive/Colab Notebooks/Datasets/Restaurant_Reviews.tsv', delimiter='\\t', quoting=3)"
69
+ ],
70
+ "execution_count": 0,
71
+ "outputs": []
72
+ },
73
+ {
74
+ "cell_type": "code",
75
+ "metadata": {
76
+ "id": "zkdfWSlej05y",
77
+ "colab_type": "code",
78
+ "outputId": "26f108a7-5617-4abe-efae-0d64d31e8041",
79
+ "colab": {
80
+ "base_uri": "https://localhost:8080/",
81
+ "height": 34
82
+ }
83
+ },
84
+ "source": [
85
+ "df.shape"
86
+ ],
87
+ "execution_count": 4,
88
+ "outputs": [
89
+ {
90
+ "output_type": "execute_result",
91
+ "data": {
92
+ "text/plain": [
93
+ "(1000, 2)"
94
+ ]
95
+ },
96
+ "metadata": {
97
+ "tags": []
98
+ },
99
+ "execution_count": 4
100
+ }
101
+ ]
102
+ },
103
+ {
104
+ "cell_type": "code",
105
+ "metadata": {
106
+ "id": "SyYImhASubeb",
107
+ "colab_type": "code",
108
+ "outputId": "2c8efdb6-17a5-48da-8ac2-7c9d2c289b09",
109
+ "colab": {
110
+ "base_uri": "https://localhost:8080/",
111
+ "height": 34
112
+ }
113
+ },
114
+ "source": [
115
+ "df.columns"
116
+ ],
117
+ "execution_count": 5,
118
+ "outputs": [
119
+ {
120
+ "output_type": "execute_result",
121
+ "data": {
122
+ "text/plain": [
123
+ "Index(['Review', 'Liked'], dtype='object')"
124
+ ]
125
+ },
126
+ "metadata": {
127
+ "tags": []
128
+ },
129
+ "execution_count": 5
130
+ }
131
+ ]
132
+ },
133
+ {
134
+ "cell_type": "code",
135
+ "metadata": {
136
+ "id": "b5lzlG5DMNX9",
137
+ "colab_type": "code",
138
+ "outputId": "ab125608-7f10-479c-8dab-bb298fa7bbaf",
139
+ "colab": {
140
+ "base_uri": "https://localhost:8080/",
141
+ "height": 197
142
+ }
143
+ },
144
+ "source": [
145
+ "df.head()"
146
+ ],
147
+ "execution_count": 6,
148
+ "outputs": [
149
+ {
150
+ "output_type": "execute_result",
151
+ "data": {
152
+ "text/html": [
153
+ "<div>\n",
154
+ "<style scoped>\n",
155
+ " .dataframe tbody tr th:only-of-type {\n",
156
+ " vertical-align: middle;\n",
157
+ " }\n",
158
+ "\n",
159
+ " .dataframe tbody tr th {\n",
160
+ " vertical-align: top;\n",
161
+ " }\n",
162
+ "\n",
163
+ " .dataframe thead th {\n",
164
+ " text-align: right;\n",
165
+ " }\n",
166
+ "</style>\n",
167
+ "<table border=\"1\" class=\"dataframe\">\n",
168
+ " <thead>\n",
169
+ " <tr style=\"text-align: right;\">\n",
170
+ " <th></th>\n",
171
+ " <th>Review</th>\n",
172
+ " <th>Liked</th>\n",
173
+ " </tr>\n",
174
+ " </thead>\n",
175
+ " <tbody>\n",
176
+ " <tr>\n",
177
+ " <th>0</th>\n",
178
+ " <td>Wow... Loved this place.</td>\n",
179
+ " <td>1</td>\n",
180
+ " </tr>\n",
181
+ " <tr>\n",
182
+ " <th>1</th>\n",
183
+ " <td>Crust is not good.</td>\n",
184
+ " <td>0</td>\n",
185
+ " </tr>\n",
186
+ " <tr>\n",
187
+ " <th>2</th>\n",
188
+ " <td>Not tasty and the texture was just nasty.</td>\n",
189
+ " <td>0</td>\n",
190
+ " </tr>\n",
191
+ " <tr>\n",
192
+ " <th>3</th>\n",
193
+ " <td>Stopped by during the late May bank holiday of...</td>\n",
194
+ " <td>1</td>\n",
195
+ " </tr>\n",
196
+ " <tr>\n",
197
+ " <th>4</th>\n",
198
+ " <td>The selection on the menu was great and so wer...</td>\n",
199
+ " <td>1</td>\n",
200
+ " </tr>\n",
201
+ " </tbody>\n",
202
+ "</table>\n",
203
+ "</div>"
204
+ ],
205
+ "text/plain": [
206
+ " Review Liked\n",
207
+ "0 Wow... Loved this place. 1\n",
208
+ "1 Crust is not good. 0\n",
209
+ "2 Not tasty and the texture was just nasty. 0\n",
210
+ "3 Stopped by during the late May bank holiday of... 1\n",
211
+ "4 The selection on the menu was great and so wer... 1"
212
+ ]
213
+ },
214
+ "metadata": {
215
+ "tags": []
216
+ },
217
+ "execution_count": 6
218
+ }
219
+ ]
220
+ },
221
+ {
222
+ "cell_type": "markdown",
223
+ "metadata": {
224
+ "id": "38_tPfGAr0AL",
225
+ "colab_type": "text"
226
+ },
227
+ "source": [
228
+ "# **Data Preprocessing**"
229
+ ]
230
+ },
231
+ {
232
+ "cell_type": "code",
233
+ "metadata": {
234
+ "id": "gZpsSpUAkCyH",
235
+ "colab_type": "code",
236
+ "outputId": "81a672d9-a796-4789-e2e8-36d360f9e558",
237
+ "colab": {
238
+ "base_uri": "https://localhost:8080/",
239
+ "height": 52
240
+ }
241
+ },
242
+ "source": [
243
+ "# Importing essential libraries for performing Natural Language Processing on 'Restaurant_Reviews.tsv' dataset\n",
244
+ "import nltk\n",
245
+ "import re\n",
246
+ "nltk.download('stopwords')\n",
247
+ "from nltk.corpus import stopwords\n",
248
+ "from nltk.stem.porter import PorterStemmer"
249
+ ],
250
+ "execution_count": 7,
251
+ "outputs": [
252
+ {
253
+ "output_type": "stream",
254
+ "text": [
255
+ "[nltk_data] Downloading package stopwords to /root/nltk_data...\n",
256
+ "[nltk_data] Package stopwords is already up-to-date!\n"
257
+ ],
258
+ "name": "stdout"
259
+ }
260
+ ]
261
+ },
262
+ {
263
+ "cell_type": "code",
264
+ "metadata": {
265
+ "id": "tUnp7Dr7mFwn",
266
+ "colab_type": "code",
267
+ "colab": {}
268
+ },
269
+ "source": [
270
+ "# Cleaning the reviews\n",
271
+ "corpus = []\n",
272
+ "for i in range(0,1000):\n",
273
+ "\n",
274
+ " # Cleaning special character from the reviews\n",
275
+ " review = re.sub(pattern='[^a-zA-Z]',repl=' ', string=df['Review'][i])\n",
276
+ "\n",
277
+ " # Converting the entire review into lower case\n",
278
+ " review = review.lower()\n",
279
+ "\n",
280
+ " # Tokenizing the review by words\n",
281
+ " review_words = review.split()\n",
282
+ "\n",
283
+ " # Removing the stop words\n",
284
+ " review_words = [word for word in review_words if not word in set(stopwords.words('english'))]\n",
285
+ "\n",
286
+ " # Stemming the words\n",
287
+ " ps = PorterStemmer()\n",
288
+ " review = [ps.stem(word) for word in review_words]\n",
289
+ "\n",
290
+ " # Joining the stemmed words\n",
291
+ " review = ' '.join(review)\n",
292
+ "\n",
293
+ " # Creating a corpus\n",
294
+ " corpus.append(review)"
295
+ ],
296
+ "execution_count": 0,
297
+ "outputs": []
298
+ },
299
+ {
300
+ "cell_type": "code",
301
+ "metadata": {
302
+ "id": "6ewB2oNJ0rr9",
303
+ "colab_type": "code",
304
+ "outputId": "9f2c2e4b-adf7-4157-d573-f3383a16cee0",
305
+ "colab": {
306
+ "base_uri": "https://localhost:8080/",
307
+ "height": 194
308
+ }
309
+ },
310
+ "source": [
311
+ "corpus[0:10]"
312
+ ],
313
+ "execution_count": 9,
314
+ "outputs": [
315
+ {
316
+ "output_type": "execute_result",
317
+ "data": {
318
+ "text/plain": [
319
+ "['wow love place',\n",
320
+ " 'crust good',\n",
321
+ " 'tasti textur nasti',\n",
322
+ " 'stop late may bank holiday rick steve recommend love',\n",
323
+ " 'select menu great price',\n",
324
+ " 'get angri want damn pho',\n",
325
+ " 'honeslti tast fresh',\n",
326
+ " 'potato like rubber could tell made ahead time kept warmer',\n",
327
+ " 'fri great',\n",
328
+ " 'great touch']"
329
+ ]
330
+ },
331
+ "metadata": {
332
+ "tags": []
333
+ },
334
+ "execution_count": 9
335
+ }
336
+ ]
337
+ },
338
+ {
339
+ "cell_type": "code",
340
+ "metadata": {
341
+ "id": "spNHLhGs20LV",
342
+ "colab_type": "code",
343
+ "colab": {}
344
+ },
345
+ "source": [
346
+ "# Creating the Bag of Words model\n",
347
+ "from sklearn.feature_extraction.text import CountVectorizer\n",
348
+ "cv = CountVectorizer(max_features=1500)\n",
349
+ "X = cv.fit_transform(corpus).toarray()\n",
350
+ "y = df.iloc[:, 1].values"
351
+ ],
352
+ "execution_count": 0,
353
+ "outputs": []
354
+ },
355
+ {
356
+ "cell_type": "markdown",
357
+ "metadata": {
358
+ "id": "jYNkfBqJ42hs",
359
+ "colab_type": "text"
360
+ },
361
+ "source": [
362
+ "# **Model Building**"
363
+ ]
364
+ },
365
+ {
366
+ "cell_type": "code",
367
+ "metadata": {
368
+ "id": "sL6FOXMx45w0",
369
+ "colab_type": "code",
370
+ "colab": {}
371
+ },
372
+ "source": [
373
+ "from sklearn.model_selection import train_test_split\n",
374
+ "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)"
375
+ ],
376
+ "execution_count": 0,
377
+ "outputs": []
378
+ },
379
+ {
380
+ "cell_type": "code",
381
+ "metadata": {
382
+ "id": "KYTe6hjJDV8K",
383
+ "colab_type": "code",
384
+ "outputId": "56f78ef1-3f7f-40ce-cf1c-15a2b91b61c3",
385
+ "colab": {
386
+ "base_uri": "https://localhost:8080/",
387
+ "height": 34
388
+ }
389
+ },
390
+ "source": [
391
+ "# Fitting Naive Bayes to the Training set\n",
392
+ "from sklearn.naive_bayes import MultinomialNB\n",
393
+ "classifier = MultinomialNB()\n",
394
+ "classifier.fit(X_train, y_train)"
395
+ ],
396
+ "execution_count": 12,
397
+ "outputs": [
398
+ {
399
+ "output_type": "execute_result",
400
+ "data": {
401
+ "text/plain": [
402
+ "MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)"
403
+ ]
404
+ },
405
+ "metadata": {
406
+ "tags": []
407
+ },
408
+ "execution_count": 12
409
+ }
410
+ ]
411
+ },
412
+ {
413
+ "cell_type": "code",
414
+ "metadata": {
415
+ "id": "CjXrDsEyDbD7",
416
+ "colab_type": "code",
417
+ "colab": {}
418
+ },
419
+ "source": [
420
+ "# Predicting the Test set results\n",
421
+ "y_pred = classifier.predict(X_test)"
422
+ ],
423
+ "execution_count": 0,
424
+ "outputs": []
425
+ },
426
+ {
427
+ "cell_type": "code",
428
+ "metadata": {
429
+ "id": "CcRU4PabPDY-",
430
+ "colab_type": "code",
431
+ "outputId": "4985115a-e9be-4447-9a22-026c59045ec9",
432
+ "colab": {
433
+ "base_uri": "https://localhost:8080/",
434
+ "height": 87
435
+ }
436
+ },
437
+ "source": [
438
+ "# Accuracy, Precision and Recall\n",
439
+ "from sklearn.metrics import accuracy_score\n",
440
+ "from sklearn.metrics import precision_score\n",
441
+ "from sklearn.metrics import recall_score\n",
442
+ "score1 = accuracy_score(y_test,y_pred)\n",
443
+ "score2 = precision_score(y_test,y_pred)\n",
444
+ "score3= recall_score(y_test,y_pred)\n",
445
+ "print(\"---- Scores ----\")\n",
446
+ "print(\"Accuracy score is: {}%\".format(round(score1*100,2)))\n",
447
+ "print(\"Precision score is: {}\".format(round(score2,2)))\n",
448
+ "print(\"Recall score is: {}\".format(round(score3,2)))"
449
+ ],
450
+ "execution_count": 14,
451
+ "outputs": [
452
+ {
453
+ "output_type": "stream",
454
+ "text": [
455
+ "---- Scores ----\n",
456
+ "Accuracy score is: 76.5%\n",
457
+ "Precision score is: 0.76\n",
458
+ "Recall score is: 0.79\n"
459
+ ],
460
+ "name": "stdout"
461
+ }
462
+ ]
463
+ },
464
+ {
465
+ "cell_type": "code",
466
+ "metadata": {
467
+ "id": "-77oRRHjDgwr",
468
+ "colab_type": "code",
469
+ "colab": {}
470
+ },
471
+ "source": [
472
+ "# Making the Confusion Matrix\n",
473
+ "from sklearn.metrics import confusion_matrix\n",
474
+ "cm = confusion_matrix(y_test, y_pred)"
475
+ ],
476
+ "execution_count": 0,
477
+ "outputs": []
478
+ },
479
+ {
480
+ "cell_type": "code",
481
+ "metadata": {
482
+ "id": "9lRKOJ-zjv3F",
483
+ "colab_type": "code",
484
+ "colab": {
485
+ "base_uri": "https://localhost:8080/",
486
+ "height": 52
487
+ },
488
+ "outputId": "b5c14f34-e062-4cf6-b899-31a5d583d62c"
489
+ },
490
+ "source": [
491
+ "cm"
492
+ ],
493
+ "execution_count": 16,
494
+ "outputs": [
495
+ {
496
+ "output_type": "execute_result",
497
+ "data": {
498
+ "text/plain": [
499
+ "array([[72, 25],\n",
500
+ " [22, 81]])"
501
+ ]
502
+ },
503
+ "metadata": {
504
+ "tags": []
505
+ },
506
+ "execution_count": 16
507
+ }
508
+ ]
509
+ },
510
+ {
511
+ "cell_type": "code",
512
+ "metadata": {
513
+ "id": "hYd9LdXmDkKb",
514
+ "colab_type": "code",
515
+ "outputId": "30c403fb-f204-42ff-a19c-eb2ecbdf8cd5",
516
+ "colab": {
517
+ "base_uri": "https://localhost:8080/",
518
+ "height": 461
519
+ }
520
+ },
521
+ "source": [
522
+ "# Plotting the confusion matrix\n",
523
+ "import matplotlib.pyplot as plt\n",
524
+ "import seaborn as sns\n",
525
+ "%matplotlib inline\n",
526
+ "\n",
527
+ "plt.figure(figsize = (10,6))\n",
528
+ "sns.heatmap(cm, annot=True, cmap=\"YlGnBu\", xticklabels=['Negative', 'Positive'], yticklabels=['Negative', 'Positive'])\n",
529
+ "plt.xlabel('Predicted values')\n",
530
+ "plt.ylabel('Actual values')"
531
+ ],
532
+ "execution_count": 17,
533
+ "outputs": [
534
+ {
535
+ "output_type": "stream",
536
+ "text": [
537
+ "/usr/local/lib/python3.6/dist-packages/statsmodels/tools/_testing.py:19: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.\n",
538
+ " import pandas.util.testing as tm\n"
539
+ ],
540
+ "name": "stderr"
541
+ },
542
+ {
543
+ "output_type": "execute_result",
544
+ "data": {
545
+ "text/plain": [
546
+ "Text(69.0, 0.5, 'Actual values')"
547
+ ]
548
+ },
549
+ "metadata": {
550
+ "tags": []
551
+ },
552
+ "execution_count": 17
553
+ },
554
+ {
555
+ "output_type": "display_data",
556
+ "data": {
557
+ "image/png": "\n",
558
+ "text/plain": [
559
+ "<Figure size 720x432 with 2 Axes>"
560
+ ]
561
+ },
562
+ "metadata": {
563
+ "tags": [],
564
+ "needs_background": "light"
565
+ }
566
+ }
567
+ ]
568
+ },
569
+ {
570
+ "cell_type": "code",
571
+ "metadata": {
572
+ "id": "LJbZKcc9jWcV",
573
+ "colab_type": "code",
574
+ "colab": {
575
+ "base_uri": "https://localhost:8080/",
576
+ "height": 230
577
+ },
578
+ "outputId": "654b7fc8-9c8e-452b-c14c-dd57c87d82ec"
579
+ },
580
+ "source": [
581
+ "# Hyperparameter tuning the Naive Bayes Classifier\n",
582
+ "best_accuracy = 0.0\n",
583
+ "alpha_val = 0.0\n",
584
+ "for i in np.arange(0.1,1.1,0.1):\n",
585
+ " temp_classifier = MultinomialNB(alpha=i)\n",
586
+ " temp_classifier.fit(X_train, y_train)\n",
587
+ " temp_y_pred = temp_classifier.predict(X_test)\n",
588
+ " score = accuracy_score(y_test, temp_y_pred)\n",
589
+ " print(\"Accuracy score for alpha={} is: {}%\".format(round(i,1), round(score*100,2)))\n",
590
+ " if score>best_accuracy:\n",
591
+ " best_accuracy = score\n",
592
+ " alpha_val = i\n",
593
+ "print('--------------------------------------------')\n",
594
+ "print('The best accuracy is {}% with alpha value as {}'.format(round(best_accuracy*100, 2), round(alpha_val,1)))"
595
+ ],
596
+ "execution_count": 18,
597
+ "outputs": [
598
+ {
599
+ "output_type": "stream",
600
+ "text": [
601
+ "Accuracy score for alpha=0.1 is: 78.0%\n",
602
+ "Accuracy score for alpha=0.2 is: 78.5%\n",
603
+ "Accuracy score for alpha=0.3 is: 78.0%\n",
604
+ "Accuracy score for alpha=0.4 is: 78.0%\n",
605
+ "Accuracy score for alpha=0.5 is: 77.5%\n",
606
+ "Accuracy score for alpha=0.6 is: 77.5%\n",
607
+ "Accuracy score for alpha=0.7 is: 77.5%\n",
608
+ "Accuracy score for alpha=0.8 is: 77.0%\n",
609
+ "Accuracy score for alpha=0.9 is: 76.5%\n",
610
+ "Accuracy score for alpha=1.0 is: 76.5%\n",
611
+ "--------------------------------------------\n",
612
+ "The best accuracy is 78.5% with alpha value as 0.2\n"
613
+ ],
614
+ "name": "stdout"
615
+ }
616
+ ]
617
+ },
618
+ {
619
+ "cell_type": "code",
620
+ "metadata": {
621
+ "id": "9BNR7SfKkDsL",
622
+ "colab_type": "code",
623
+ "colab": {
624
+ "base_uri": "https://localhost:8080/",
625
+ "height": 34
626
+ },
627
+ "outputId": "0ebe229f-009d-46fa-852c-90b758d548b6"
628
+ },
629
+ "source": [
630
+ "classifier = MultinomialNB(alpha=0.2)\n",
631
+ "classifier.fit(X_train, y_train)"
632
+ ],
633
+ "execution_count": 19,
634
+ "outputs": [
635
+ {
636
+ "output_type": "execute_result",
637
+ "data": {
638
+ "text/plain": [
639
+ "MultinomialNB(alpha=0.2, class_prior=None, fit_prior=True)"
640
+ ]
641
+ },
642
+ "metadata": {
643
+ "tags": []
644
+ },
645
+ "execution_count": 19
646
+ }
647
+ ]
648
+ },
649
+ {
650
+ "cell_type": "markdown",
651
+ "metadata": {
652
+ "id": "iYQVSu17MWgV",
653
+ "colab_type": "text"
654
+ },
655
+ "source": [
656
+ "# **Predictions**"
657
+ ]
658
+ },
659
+ {
660
+ "cell_type": "code",
661
+ "metadata": {
662
+ "id": "mYbh9DFvwmW1",
663
+ "colab_type": "code",
664
+ "colab": {}
665
+ },
666
+ "source": [
667
+ "def predict_sentiment(sample_review):\n",
668
+ " sample_review = re.sub(pattern='[^a-zA-Z]',repl=' ', string = sample_review)\n",
669
+ " sample_review = sample_review.lower()\n",
670
+ " sample_review_words = sample_review.split()\n",
671
+ " sample_review_words = [word for word in sample_review_words if not word in set(stopwords.words('english'))]\n",
672
+ " ps = PorterStemmer()\n",
673
+ " final_review = [ps.stem(word) for word in sample_review_words]\n",
674
+ " final_review = ' '.join(final_review)\n",
675
+ "\n",
676
+ " temp = cv.transform([final_review]).toarray()\n",
677
+ " return classifier.predict(temp)"
678
+ ],
679
+ "execution_count": 0,
680
+ "outputs": []
681
+ },
682
+ {
683
+ "cell_type": "code",
684
+ "metadata": {
685
+ "id": "Os0d_BZELC95",
686
+ "colab_type": "code",
687
+ "outputId": "3478b8c9-55a9-454f-aaae-b42ccc28d609",
688
+ "colab": {
689
+ "base_uri": "https://localhost:8080/",
690
+ "height": 34
691
+ }
692
+ },
693
+ "source": [
694
+ "# Predicting values\n",
695
+ "sample_review = 'The food is really good here.'\n",
696
+ "\n",
697
+ "if predict_sentiment(sample_review):\n",
698
+ " print('This is a POSITIVE review.')\n",
699
+ "else:\n",
700
+ " print('This is a NEGATIVE review!')"
701
+ ],
702
+ "execution_count": 21,
703
+ "outputs": [
704
+ {
705
+ "output_type": "stream",
706
+ "text": [
707
+ "This is a POSITIVE review.\n"
708
+ ],
709
+ "name": "stdout"
710
+ }
711
+ ]
712
+ },
713
+ {
714
+ "cell_type": "code",
715
+ "metadata": {
716
+ "id": "A88ILf9PNAKY",
717
+ "colab_type": "code",
718
+ "outputId": "d1fe224e-373f-4e98-9c05-da96980d4f49",
719
+ "colab": {
720
+ "base_uri": "https://localhost:8080/",
721
+ "height": 34
722
+ }
723
+ },
724
+ "source": [
725
+ "# Predicting values\n",
726
+ "sample_review = 'Food was pretty bad and the service was very slow.'\n",
727
+ "\n",
728
+ "if predict_sentiment(sample_review):\n",
729
+ " print('This is a POSITIVE review.')\n",
730
+ "else:\n",
731
+ " print('This is a NEGATIVE review!')"
732
+ ],
733
+ "execution_count": 22,
734
+ "outputs": [
735
+ {
736
+ "output_type": "stream",
737
+ "text": [
738
+ "This is a NEGATIVE review!\n"
739
+ ],
740
+ "name": "stdout"
741
+ }
742
+ ]
743
+ },
744
+ {
745
+ "cell_type": "code",
746
+ "metadata": {
747
+ "id": "UXgRRzafOX3d",
748
+ "colab_type": "code",
749
+ "outputId": "f913faa2-38b5-48c6-f6fa-456ab807a01c",
750
+ "colab": {
751
+ "base_uri": "https://localhost:8080/",
752
+ "height": 34
753
+ }
754
+ },
755
+ "source": [
756
+ "# Predicting values\n",
757
+ "sample_review = 'The food was absolutely wonderful, from preparation to presentation, very pleasing.'\n",
758
+ "\n",
759
+ "if predict_sentiment(sample_review):\n",
760
+ " print('This is a POSITIVE review.')\n",
761
+ "else:\n",
762
+ " print('This is a NEGATIVE review!')"
763
+ ],
764
+ "execution_count": 23,
765
+ "outputs": [
766
+ {
767
+ "output_type": "stream",
768
+ "text": [
769
+ "This is a POSITIVE review.\n"
770
+ ],
771
+ "name": "stdout"
772
+ }
773
+ ]
774
+ }
775
+ ]
776
+ }