diff --git "a/notebooks/DataCleaning_and_EDA_update (3).ipynb" "b/notebooks/DataCleaning_and_EDA_update (3).ipynb" new file mode 100644--- /dev/null +++ "b/notebooks/DataCleaning_and_EDA_update (3).ipynb" @@ -0,0 +1,10795 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VIGPNlN8sqqp" + }, + "outputs": [], + "source": [ + "%%capture\n", + "!pip install transformers\n", + "!pip install accelerate -U\n", + "!pip install --upgrade tensorflow\n", + "! pip install datasets\n", + "! pip install huggingface_hub\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dqgUXWuwZvJQ" + }, + "outputs": [], + "source": [ + "from google.colab import drive\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "UFP95uHeTaGK" + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "mount_point = \"/content/MyDrive/deep-learning\"\n", + "if not os.path.exists(mount_point):\n", + " os.makedirs(mount_point)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9IakUnxiu_1g" + }, + "outputs": [], + "source": [ + "%%capture\n", + "\n", + "##for data handling\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "##visualizations\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import plotly.express as px\n", + "import seaborn as sns\n", + "\n", + "##NLP\n", + "\n", + "import nltk\n", + "from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer\n", + "from wordcloud import WordCloud, STOPWORDS\n", + "import re,string, unicodedata\n", + "from nltk.tokenize import word_tokenize\n", + "from nltk.stem import WordNetLemmatizer\n", + "from string import punctuation\n", + "from nltk.corpus import wordnet\n", + "from collections import Counter\n", + "import string\n", + "nltk.download('punkt')\n", + "nltk.download('wordnet')\n", + "nltk.download('maxent_ne_chunker')\n", + "nltk.download(\"words\")\n", + "import nltk\n", + "from nltk.corpus import gutenberg\n", + "from nltk import FreqDist\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9VZbCA6s9pzo" + }, + "outputs": [], + "source": [ + "train_path= '/content/drive/MyDrive/deep-learning/Train.csv'\n", + "test_path= '/content/drive/MyDrive/deep-learning/Test.csv'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8Frz5b19-VVO" + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "NMGZ7qwxWHQm", + "outputId": "c2262d8e-2385-4568-8abf-13ae5ba422d0" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 62.4 ms, sys: 19 ms, total: 81.4 ms\n", + "Wall time: 545 ms\n" + ] + } + ], + "source": [ + "\n", + "%%time\n", + "df_train= pd.read_csv(train_path)\n", + "df_test= pd.read_csv(test_path)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "EOFVlu8obzQQ", + "outputId": "24839376-571a-47d7-f24c-7b764f648f85" + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + " | tweet_id | \n", + "safe_text | \n", + "label | \n", + "agreement | \n", + "
---|---|---|---|---|
0 | \n", + "CL1KWCMY | \n", + "Me & The Big Homie meanboy3000 #MEANBOY #M... | \n", + "0.0 | \n", + "1.0 | \n", + "
1 | \n", + "E3303EME | \n", + "I'm 100% thinking of devoting my career to pro... | \n", + "1.0 | \n", + "1.0 | \n", + "
2 | \n", + "M4IVFSMS | \n", + "#whatcausesautism VACCINES, DO NOT VACCINATE Y... | \n", + "-1.0 | \n", + "1.0 | \n", + "
3 | \n", + "1DR6ROZ4 | \n", + "I mean if they immunize my kid with something ... | \n", + "-1.0 | \n", + "1.0 | \n", + "
4 | \n", + "J77ENIIE | \n", + "Thanks to <user> Catch me performing at La Nui... | \n", + "0.0 | \n", + "1.0 | \n", + "
\n", + " | tweet_id | \n", + "safe_text | \n", + "
---|---|---|
0 | \n", + "00BHHHP1 | \n", + "<user> <user> ... & 4 a vaccine given 2 he... | \n", + "
1 | \n", + "00UNMD0E | \n", + "Students starting school without whooping coug... | \n", + "
2 | \n", + "01AXPTJF | \n", + "I'm kinda over every ep of <user> being \"rippe... | \n", + "
3 | \n", + "01HOEQJW | \n", + "How many innocent children die for lack of vac... | \n", + "
4 | \n", + "01JUKMAO | \n", + "CDC eyeing bird flu vaccine for humans, though... | \n", + "
\n", + " | tweet_id | \n", + "tweets | \n", + "
---|---|---|
0 | \n", + "00BHHHP1 | \n", + "<user> <user> ... & 4 a vaccine given 2 he... | \n", + "
1 | \n", + "00UNMD0E | \n", + "Students starting school without whooping coug... | \n", + "
2 | \n", + "01AXPTJF | \n", + "I'm kinda over every ep of <user> being \"rippe... | \n", + "
3 | \n", + "01HOEQJW | \n", + "How many innocent children die for lack of vac... | \n", + "
4 | \n", + "01JUKMAO | \n", + "CDC eyeing bird flu vaccine for humans, though... | \n", + "
5 | \n", + "01V1X8XW | \n", + "I think that active duty soldiers should get v... | \n", + "
\n", + " | tweet_id | \n", + "tweets | \n", + "label | \n", + "agreement | \n", + "
---|---|---|---|---|
0 | \n", + "CL1KWCMY | \n", + "Me & The Big Homie meanboy3000 #MEANBOY #M... | \n", + "0.0 | \n", + "1.000000 | \n", + "
1 | \n", + "E3303EME | \n", + "I'm 100% thinking of devoting my career to pro... | \n", + "1.0 | \n", + "1.000000 | \n", + "
2 | \n", + "M4IVFSMS | \n", + "#whatcausesautism VACCINES, DO NOT VACCINATE Y... | \n", + "-1.0 | \n", + "1.000000 | \n", + "
3 | \n", + "1DR6ROZ4 | \n", + "I mean if they immunize my kid with something ... | \n", + "-1.0 | \n", + "1.000000 | \n", + "
4 | \n", + "J77ENIIE | \n", + "Thanks to <user> Catch me performing at La Nui... | \n", + "0.0 | \n", + "1.000000 | \n", + "
5 | \n", + "OVNPOAUX | \n", + "<user> a nearly 67 year old study when mental ... | \n", + "1.0 | \n", + "0.666667 | \n", + "
\n", + " | label | \n", + "agreement | \n", + "
---|---|---|
count | \n", + "10000.000000 | \n", + "9999.000000 | \n", + "
mean | \n", + "0.301567 | \n", + "0.854252 | \n", + "
std | \n", + "0.646718 | \n", + "0.180707 | \n", + "
min | \n", + "-1.000000 | \n", + "0.333333 | \n", + "
25% | \n", + "0.000000 | \n", + "0.666667 | \n", + "
50% | \n", + "0.000000 | \n", + "1.000000 | \n", + "
75% | \n", + "1.000000 | \n", + "1.000000 | \n", + "
max | \n", + "1.000000 | \n", + "1.000000 | \n", + "
\n", + " | label | \n", + "agreement | \n", + "
---|---|---|
count | \n", + "10000.000000 | \n", + "9999.000000 | \n", + "
mean | \n", + "0.301567 | \n", + "0.854252 | \n", + "
std | \n", + "0.646718 | \n", + "0.180707 | \n", + "
min | \n", + "-1.000000 | \n", + "0.333333 | \n", + "
25% | \n", + "0.000000 | \n", + "0.666667 | \n", + "
50% | \n", + "0.000000 | \n", + "1.000000 | \n", + "
75% | \n", + "1.000000 | \n", + "1.000000 | \n", + "
max | \n", + "1.000000 | \n", + "1.000000 | \n", + "
\n", + " | tweet_id | \n", + "tweets | \n", + "label | \n", + "agreement | \n", + "
---|---|---|---|---|
4798 | \n", + "RQMQ0L2A | \n", + "#lawandorderSVU | \n", + "NaN | \n", + "NaN | \n", + "
4799 | \n", + "I cannot believe in this day and age some pare... | \n", + "1 | \n", + "0.666667 | \n", + "NaN | \n", + "
\n", + " | tweet_id | \n", + "tweets | \n", + "label | \n", + "agreement | \n", + "
---|
\n", + " | tweet_id | \n", + "tweets | \n", + "
---|
\n", + " | tweet_id | \n", + "tweets | \n", + "label | \n", + "agreement | \n", + "
---|---|---|---|---|
2 | \n", + "M4IVFSMS | \n", + "#whatcausesautism VACCINES, DO NOT VACCINATE Y... | \n", + "-1.0 | \n", + "1.000000 | \n", + "
3 | \n", + "1DR6ROZ4 | \n", + "I mean if they immunize my kid with something ... | \n", + "-1.0 | \n", + "1.000000 | \n", + "
24 | \n", + "89AB846O | \n", + "<user> #CDC lied and hid data that black boys ... | \n", + "-1.0 | \n", + "1.000000 | \n", + "
25 | \n", + "XSXFN1C8 | \n", + "<user> vaccines causing autism | \n", + "-1.0 | \n", + "1.000000 | \n", + "
35 | \n", + "6PMH7C56 | \n", + "<user> <user> Other than that, his defense is ... | \n", + "-1.0 | \n", + "0.333333 | \n", + "
47 | \n", + "G1CJ54KD | \n", + "<user> I'm not vaccinating my kids lol | \n", + "-1.0 | \n", + "1.000000 | \n", + "
58 | \n", + "Y3OMTB1Q | \n", + "<user> yeah. I'll just stick to my regular vac... | \n", + "-1.0 | \n", + "0.666667 | \n", + "
62 | \n", + "ZCOLETM5 | \n", + "CIA: No more vaccination campaigns in spy ops ... | \n", + "-1.0 | \n", + "0.666667 | \n", + "
65 | \n", + "E88B1XQJ | \n", + "Vaccine Brain Damage Cover Up Implodes: <url> ... | \n", + "-1.0 | \n", + "1.000000 | \n", + "
89 | \n", + "43MWGI00 | \n", + "Centers for Disease Control: This Year’s Flu V... | \n", + "-1.0 | \n", + "0.666667 | \n", + "
90 | \n", + "TP0MIEXK | \n", + "<user> TY. Fought hard 2 NOT vaccinate my kids... | \n", + "-1.0 | \n", + "0.666667 | \n", + "
100 | \n", + "85B8L54L | \n", + "\"<user> Conservative Neurosurgeon Ben Carson S... | \n", + "-1.0 | \n", + "0.666667 | \n", + "
105 | \n", + "1TI13L1W | \n", + "<user> ok what's good u have to say about poli... | \n", + "-1.0 | \n", + "1.000000 | \n", + "
108 | \n", + "DZWTVPSH | \n", + "Who wants a shot of autism juice...I mean meas... | \n", + "-1.0 | \n", + "1.000000 | \n", + "
118 | \n", + "NSZDXB2J | \n", + "2/3 ...yet the only way to immunize him is to ... | \n", + "-1.0 | \n", + "0.666667 | \n", + "
120 | \n", + "L2TIWPQD | \n", + "NEW: Bexar Co. District Attorney Nico LaHood: ... | \n", + "-1.0 | \n", + "1.000000 | \n", + "
128 | \n", + "DOZBHCZ5 | \n", + "<user> <user> and the vaccine will injure tens... | \n", + "-1.0 | \n", + "1.000000 | \n", + "
133 | \n", + "4KEP2GOM | \n", + "I honestly don't believe in immunization. \\r\\n... | \n", + "-1.0 | \n", + "1.000000 | \n", + "
135 | \n", + "C9QSY5LE | \n", + "Really? MMR Shots? | \n", + "-1.0 | \n", + "1.000000 | \n", + "
138 | \n", + "OA5RF3H5 | \n", + "Pro safety doesn't make me an anti-vaxer. It ... | \n", + "-1.0 | \n", + "0.333333 | \n", + "
\n", + " | tweet_id | \n", + "tweets | \n", + "label | \n", + "agreement | \n", + "tweet_length | \n", + "
---|---|---|---|---|---|
0 | \n", + "CL1KWCMY | \n", + "Me & The Big Homie meanboy3000 #MEANBOY #M... | \n", + "0.0 | \n", + "1.000000 | \n", + "15 | \n", + "
1 | \n", + "E3303EME | \n", + "I'm 100% thinking of devoting my career to pro... | \n", + "1.0 | \n", + "1.000000 | \n", + "25 | \n", + "
2 | \n", + "M4IVFSMS | \n", + "#whatcausesautism VACCINES, DO NOT VACCINATE Y... | \n", + "-1.0 | \n", + "1.000000 | \n", + "7 | \n", + "
3 | \n", + "1DR6ROZ4 | \n", + "I mean if they immunize my kid with something ... | \n", + "-1.0 | \n", + "1.000000 | \n", + "28 | \n", + "
4 | \n", + "J77ENIIE | \n", + "Thanks to <user> Catch me performing at La Nui... | \n", + "0.0 | \n", + "1.000000 | \n", + "20 | \n", + "
5 | \n", + "OVNPOAUX | \n", + "<user> a nearly 67 year old study when mental ... | \n", + "1.0 | \n", + "0.666667 | \n", + "22 | \n", + "
\n", + " | tweets | \n", + "label | \n", + "agreement | \n", + "tweet_length | \n", + "
---|---|---|---|---|
0 | \n", + "Me & The Big Homie meanboy3000 #MEANBOY #M... | \n", + "0.0 | \n", + "1.0 | \n", + "15 | \n", + "
1 | \n", + "I'm 100% thinking of devoting my career to pro... | \n", + "1.0 | \n", + "1.0 | \n", + "25 | \n", + "
2 | \n", + "#whatcausesautism VACCINES, DO NOT VACCINATE Y... | \n", + "-1.0 | \n", + "1.0 | \n", + "7 | \n", + "
3 | \n", + "I mean if they immunize my kid with something ... | \n", + "-1.0 | \n", + "1.0 | \n", + "28 | \n", + "
4 | \n", + "Thanks to <user> Catch me performing at La Nui... | \n", + "0.0 | \n", + "1.0 | \n", + "20 | \n", + "
\n", + " | tweets | \n", + "
---|---|
0 | \n", + "<user> <user> ... & 4 a vaccine given 2 he... | \n", + "
1 | \n", + "Students starting school without whooping coug... | \n", + "
2 | \n", + "I'm kinda over every ep of <user> being \"rippe... | \n", + "
3 | \n", + "How many innocent children die for lack of vac... | \n", + "
4 | \n", + "CDC eyeing bird flu vaccine for humans, though... | \n", + "
\n", + " | tweets | \n", + "label | \n", + "agreement | \n", + "tweet_length | \n", + "clean_tweet | \n", + "
---|---|---|---|---|---|
0 | \n", + "me & the big homie meanboy3000 #meanboy #m... | \n", + "0.0 | \n", + "1.000000 | \n", + "15 | \n", + "me amp the big homie meanboy3000 stegman... | \n", + "
1 | \n", + "i'm 100% thinking of devoting my career to pro... | \n", + "1.0 | \n", + "1.000000 | \n", + "25 | \n", + "im 100 thinking of devoting my career to provi... | \n", + "
2 | \n", + "#whatcausesautism vaccines, do not vaccinate y... | \n", + "-1.0 | \n", + "1.000000 | \n", + "7 | \n", + "vaccines do not vaccinate your child | \n", + "
3 | \n", + "i mean if they immunize my kid with something ... | \n", + "-1.0 | \n", + "1.000000 | \n", + "28 | \n", + "i mean if they immunize my kid with something ... | \n", + "
4 | \n", + "thanks to <user> catch me performing at la nui... | \n", + "0.0 | \n", + "1.000000 | \n", + "20 | \n", + "thanks to user catch me performing at la nuit ... | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
96 | \n", + "“<user> people who complain live longer. relea... | \n", + "0.0 | \n", + "1.000000 | \n", + "19 | \n", + "user people who complain live longer releasing... | \n", + "
97 | \n", + "austerity is not a vaccine to crisis... it is ... | \n", + "0.0 | \n", + "1.000000 | \n", + "20 | \n", + "austerity is not a vaccine to crisis it is a p... | \n", + "
98 | \n", + "“<user> compensation for autism brain damage a... | \n", + "0.0 | \n", + "0.666667 | \n", + "15 | \n", + "user compensation for autism brain damage and ... | \n", + "
99 | \n", + "is it that i seek to bear w/ them? is it that ... | \n", + "0.0 | \n", + "0.666667 | \n", + "24 | \n", + "is it that i seek to bear w them is it that i ... | \n", + "
100 | \n", + "\"<user> conservative neurosurgeon ben carson s... | \n", + "-1.0 | \n", + "0.666667 | \n", + "20 | \n", + "user conservative neurosurgeon ben carson says... | \n", + "
101 rows × 5 columns
\n", + "\n", + " | tweets | \n", + "clean_tweet | \n", + "
---|---|---|
10 | \n", + "<user> : i have built up immunity to those di... | \n", + "user i have built up immunity to those disea... | \n", + "
11 | \n", + "<user> <user> <user> study of 1.3 million kids... | \n", + "user user user study of 13 million kids reveal... | \n", + "
12 | \n", + "vaccines :-0 (@ cherokee county health departm... | \n", + "vaccines 0 cherokee county health department url | \n", + "
13 | \n", + "<user> are you sure you want to come back to a... | \n", + "user are you sure you want to come back to a m... | \n", + "
14 | \n", + "oh well <user> an 18-month-old who had not bee... | \n", + "oh well user an 18monthold who had not been va... | \n", + "
15 | \n", + "kcmo health depart:if you want a nasal flu vac... | \n", + "kcmo health departif you want a nasal flu vacc... | \n", + "
16 | \n", + "a stipulation on jay's contract should be that... | \n", + "a stipulation on jays contract should be that ... | \n", + "
17 | \n", + "if you do not vaccinate your children, let me ... | \n", + "if you do not vaccinate your children let me k... | \n", + "
18 | \n", + "currently at the health department waiting for... | \n", + "currently at the health department waiting for... | \n", + "
19 | \n", + "<user> <user> <user> <user> <user> and again, ... | \n", + "user user user user user and again they do hav... | \n", + "
20 | \n", + "disney parks-linked measles outbreak grows to ... | \n", + "disney parkslinked measles outbreak grows to 7... | \n", + "
21 | \n", + "#hatemondays#fml#immunization#health#mmr#vacci... | \n", + "nyc department of education url | \n", + "
22 | \n", + "this made me think of you. <user> “<user> seat... | \n", + "this made me think of you user user seattle ki... | \n", + "
23 | \n", + "this is how infectious diseases start, thx for... | \n", + "this is how infectious diseases start thx for ... | \n", + "
24 | \n", + "8 out of 12 patents admitted children develope... | \n", + "8 out of 12 patents admitted children develope... | \n", + "
25 | \n", + "almighty jesus, nobody gave me nothing but the... | \n", + "almighty jesus nobody gave me nothing but the ... | \n", + "
\n", + " | tweets | \n", + "label | \n", + "agreement | \n", + "tweet_length | \n", + "clean_tweet | \n", + "
---|---|---|---|---|---|
10 | \n", + "<user> @ this point i have 2 text, butw/bon jo... | \n", + "0.0 | \n", + "1.000000 | \n", + "25 | \n", + "user point 2 text butwbon jovi cover playin al... | \n", + "
11 | \n", + "my prediction, vaccine exemption in arizona wi... | \n", + "0.0 | \n", + "0.666667 | \n", + "18 | \n", + "prediction vaccine exemption arizona will end ... | \n", + "
12 | \n", + "getting my vaccines ! #china #nervous #moving ... | \n", + "1.0 | \n", + "1.000000 | \n", + "16 | \n", + "getting vaccines cheryl southern nevada health... | \n", + "
13 | \n", + "1$mug noche <user> #mmr #mixmasterrod #dcdj #m... | \n", + "0.0 | \n", + "1.000000 | \n", + "13 | \n", + "1mug noche user mad hatter url | \n", + "
14 | \n", + "got my influenza vaccine! (@ purdue university... | \n", + "1.0 | \n", + "0.666667 | \n", + "13 | \n", + "got influenza vaccine purdue university studen... | \n", + "
15 | \n", + "sb121 [enroll] meningococcal disease-pupils to... | \n", + "0.0 | \n", + "0.666667 | \n", + "12 | \n", + "sb121 enroll meningococcal diseasepupils immun... | \n", + "
16 | \n", + "increasing number of parents skip vaccinations... | \n", + "0.0 | \n", + "1.000000 | \n", + "12 | \n", + "increasing number parents skip vaccinations ch... | \n", + "
17 | \n", + "<user> thank you for standing with ca parents ... | \n", + "1.0 | \n", + "1.000000 | \n", + "16 | \n", + "user thank standing ca parents children suppor... | \n", + "
18 | \n", + "dude idc if disney land has the measles, that ... | \n", + "0.0 | \n", + "1.000000 | \n", + "20 | \n", + "dude idc disney land measles means shorter lin... | \n", + "
19 | \n", + "beeftalk: start your calf vaccinations now <ur... | \n", + "1.0 | \n", + "1.000000 | \n", + "14 | \n", + "beeftalk start calf vaccinations now url via u... | \n", + "
20 | \n", + "i don't care what <user> says, you should prob... | \n", + "1.0 | \n", + "1.000000 | \n", + "13 | \n", + "dont care user says probably kids vaccinated | \n", + "
\n", + " | tweets | \n", + "clean_tweet | \n", + "
---|---|---|
900 | \n", + "i wanna dip myself in a vat of purell after th... | \n", + "wanna dip vat purell doctors officethey safe r... | \n", + "
901 | \n", + "division of public health launches community i... | \n", + "division public health launches community immu... | \n", + "
902 | \n", + "she's a puss. haha scared of her vaccines. poo... | \n", + "shes puss haha scared vaccines poor baby spruc... | \n", + "
903 | \n", + "researcher says cdc 'chose to cover up' data l... | \n", + "researcher says cdc chose cover data linking a... | \n", + "
904 | \n", + "disney measles outbreak could get worse, exper... | \n", + "disney measles outbreak worse experts warn url | \n", + "
... | \n", + "... | \n", + "... | \n", + "
996 | \n", + "free back-to-school immunizations <url> <url> | \n", + "free backtoschool immunizations url url | \n", + "
997 | \n", + "lesson of the day: vaccinate your children or ... | \n", + "lesson day vaccinate children disney will kill... | \n", + "
998 | \n", + "<user> i have read some articles about the vac... | \n", + "user read articles vaccine say high incident p... | \n", + "
999 | \n", + "<user> #gop stop blaming #immigrants first #eb... | \n", + "user stop blaming first now diseasesbrought an... | \n", + "
1000 | \n", + "mmr shots hurt like a little b | \n", + "mmr shots hurt little b | \n", + "
101 rows × 2 columns
\n", + "\n", + " | tweets | \n", + "label | \n", + "agreement | \n", + "tweet_length | \n", + "clean_tweet | \n", + "
---|---|---|---|---|---|
0 | \n", + "me & the big homie meanboy3000 #meanboy #m... | \n", + "0.0 | \n", + "1.0 | \n", + "15 | \n", + "amp big homie meanboy stegman st | \n", + "
1 | \n", + "i'm 100% thinking of devoting my career to pro... | \n", + "1.0 | \n", + "1.0 | \n", + "25 | \n", + "im thinking devoting career proving autism isn... | \n", + "
2 | \n", + "#whatcausesautism vaccines, do not vaccinate y... | \n", + "-1.0 | \n", + "1.0 | \n", + "7 | \n", + "vaccines vaccinate child | \n", + "
3 | \n", + "i mean if they immunize my kid with something ... | \n", + "-1.0 | \n", + "1.0 | \n", + "28 | \n", + "mean immunize kid something wont secretly kill... | \n", + "
4 | \n", + "thanks to <user> catch me performing at la nui... | \n", + "0.0 | \n", + "1.0 | \n", + "20 | \n", + "thanks catch performing la nuit nyc st ave sho... | \n", + "
\n", + " | word | \n", + "count | \n", + "
---|---|---|
0 | \n", + "measles | \n", + "3177 | \n", + "
1 | \n", + "vaccine | \n", + "1469 | \n", + "
2 | \n", + "kids | \n", + "1260 | \n", + "
3 | \n", + "vaccines | \n", + "1190 | \n", + "
4 | \n", + "health | \n", + "1066 | \n", + "
5 | \n", + "vaccinate | \n", + "905 | \n", + "
6 | \n", + "children | \n", + "831 | \n", + "
7 | \n", + "people | \n", + "702 | \n", + "
8 | \n", + "dont | \n", + "677 | \n", + "
9 | \n", + "mmr | \n", + "619 | \n", + "
10 | \n", + "vaccinated | \n", + "601 | \n", + "
11 | \n", + "outbreak | \n", + "590 | \n", + "
12 | \n", + "autism | \n", + "589 | \n", + "
13 | \n", + "immunity | \n", + "548 | \n", + "
14 | \n", + "amp | \n", + "535 | \n", + "
15 | \n", + "parents | \n", + "517 | \n", + "
16 | \n", + "vaccinations | \n", + "505 | \n", + "
17 | \n", + "child | \n", + "465 | \n", + "
18 | \n", + "school | \n", + "434 | \n", + "
19 | \n", + "vaccination | \n", + "430 | \n", + "
\n", + " | tweets | \n", + "label | \n", + "agreement | \n", + "tweet_length | \n", + "clean_tweet | \n", + "
---|---|---|---|---|---|
0 | \n", + "me & the big homie meanboy3000 #meanboy #m... | \n", + "0.0 | \n", + "1.000000 | \n", + "15 | \n", + "amp big homie meanboy stegman st | \n", + "
1 | \n", + "i'm 100% thinking of devoting my career to pro... | \n", + "1.0 | \n", + "1.000000 | \n", + "25 | \n", + "im thinking devoting career proving autism isn... | \n", + "
2 | \n", + "#whatcausesautism vaccines, do not vaccinate y... | \n", + "-1.0 | \n", + "1.000000 | \n", + "7 | \n", + "vaccines vaccinate child | \n", + "
3 | \n", + "i mean if they immunize my kid with something ... | \n", + "-1.0 | \n", + "1.000000 | \n", + "28 | \n", + "mean immunize kid something wont secretly kill... | \n", + "
4 | \n", + "thanks to <user> catch me performing at la nui... | \n", + "0.0 | \n", + "1.000000 | \n", + "20 | \n", + "thanks catch performing la nuit nyc st ave sho... | \n", + "
5 | \n", + "<user> a nearly 67 year old study when mental ... | \n", + "1.0 | \n", + "0.666667 | \n", + "22 | \n", + "nearly year old study mental health studies va... | \n", + "
6 | \n", + "study of more than 95,000 kids finds no link b... | \n", + "1.0 | \n", + "0.666667 | \n", + "15 | \n", + "study kids finds link mmr vaccine autism | \n", + "
7 | \n", + "psa: vaccinate your fucking kids | \n", + "1.0 | \n", + "1.000000 | \n", + "5 | \n", + "psa vaccinate fucking kids | \n", + "
8 | \n", + "coughing extra on the shuttle and everyone thi... | \n", + "1.0 | \n", + "0.666667 | \n", + "14 | \n", + "coughing extra shuttle everyone thinks measles | \n", + "
9 | \n", + "aids vaccine created at oregon health & sc... | \n", + "1.0 | \n", + "0.666667 | \n", + "17 | \n", + "aids vaccine created oregon health amp science... | \n", + "
10 | \n", + "<user> @ this point i have 2 text, butw/bon jo... | \n", + "0.0 | \n", + "1.000000 | \n", + "25 | \n", + "point text butwbon jovi cover playin alibis ho... | \n", + "
11 | \n", + "my prediction, vaccine exemption in arizona wi... | \n", + "0.0 | \n", + "0.666667 | \n", + "18 | \n", + "prediction vaccine exemption arizona will end ... | \n", + "
12 | \n", + "getting my vaccines ! #china #nervous #moving ... | \n", + "1.0 | \n", + "1.000000 | \n", + "16 | \n", + "getting vaccines cheryl southern nevada health... | \n", + "
13 | \n", + "1$mug noche <user> #mmr #mixmasterrod #dcdj #m... | \n", + "0.0 | \n", + "1.000000 | \n", + "13 | \n", + "mug noche mad hatter | \n", + "
14 | \n", + "got my influenza vaccine! (@ purdue university... | \n", + "1.0 | \n", + "0.666667 | \n", + "13 | \n", + "got influenza vaccine purdue university studen... | \n", + "
15 | \n", + "sb121 [enroll] meningococcal disease-pupils to... | \n", + "0.0 | \n", + "0.666667 | \n", + "12 | \n", + "sb enroll meningococcal diseasepupils immunize... | \n", + "
16 | \n", + "increasing number of parents skip vaccinations... | \n", + "0.0 | \n", + "1.000000 | \n", + "12 | \n", + "increasing number parents skip vaccinations ch... | \n", + "
17 | \n", + "<user> thank you for standing with ca parents ... | \n", + "1.0 | \n", + "1.000000 | \n", + "16 | \n", + "thank standing ca parents children support | \n", + "
18 | \n", + "dude idc if disney land has the measles, that ... | \n", + "0.0 | \n", + "1.000000 | \n", + "20 | \n", + "dude idc disney land measles means shorter lin... | \n", + "
19 | \n", + "beeftalk: start your calf vaccinations now <ur... | \n", + "1.0 | \n", + "1.000000 | \n", + "14 | \n", + "beeftalk start calf vaccinations now via good ... | \n", + "
20 | \n", + "i don't care what <user> says, you should prob... | \n", + "1.0 | \n", + "1.000000 | \n", + "13 | \n", + "dont care says probably kids vaccinated | \n", + "
21 | \n", + "#acr13 small study shows in 10 pts with lupus ... | \n", + "1.0 | \n", + "0.666667 | \n", + "20 | \n", + "small study shows pts lupus shingles vaccine s... | \n", + "
22 | \n", + "cdc: measles epidemic poses travel risks usat.... | \n", + "0.0 | \n", + "1.000000 | \n", + "19 | \n", + "cdc measles epidemic poses travel risks usatly... | \n", + "
23 | \n", + "every time i see the \"to vaccinate or not\" deb... | \n", + "1.0 | \n", + "1.000000 | \n", + "24 | \n", + "every time see vaccinate debate wonder one sid... | \n", + "
24 | \n", + "<user> #cdc lied and hid data that black boys ... | \n", + "-1.0 | \n", + "1.000000 | \n", + "21 | \n", + "lied hid data black boys uncreased risk develo... | \n", + "
25 | \n", + "<user> vaccines causing autism | \n", + "-1.0 | \n", + "1.000000 | \n", + "4 | \n", + "vaccines causing autism | \n", + "
26 | \n", + "“<user> i rarely see arguments about over vacc... | \n", + "0.0 | \n", + "0.666667 | \n", + "14 | \n", + "rarely see arguments vaccination actually big ... | \n", + "
27 | \n", + "i'm not obsessed w ebola, just following an ou... | \n", + "1.0 | \n", + "0.666667 | \n", + "25 | \n", + "im obsessed w ebola following outbreak worlds ... | \n", + "
28 | \n", + "<user> joshthenewt i suck at the game, haha we... | \n", + "0.0 | \n", + "1.000000 | \n", + "19 | \n", + "joshthenewt suck game haha well people say don... | \n", + "
29 | \n", + "don't shake his hand, pocahontas! that's proba... | \n", + "0.0 | \n", + "0.666667 | \n", + "13 | \n", + "dont shake hand pocahontas thats probably meas... | \n", + "
30 | \n", + "<user> yes. i'm a part of public health just l... | \n", + "1.0 | \n", + "1.000000 | \n", + "26 | \n", + "yes im part public health im part nature feed ... | \n", + "
31 | \n", + "new studies show that vaccines are not associa... | \n", + "1.0 | \n", + "1.000000 | \n", + "17 | \n", + "new studies show vaccines associated autism ne... | \n", + "
32 | \n", + "not to be repetitive, but i could not be less ... | \n", + "1.0 | \n", + "1.000000 | \n", + "22 | \n", + "repetitive less shocked increase asd even gene... | \n", + "
33 | \n", + "glad i got vaccinated! “<user> health alert: a... | \n", + "1.0 | \n", + "1.000000 | \n", + "18 | \n", + "glad got vaccinated health alert case meningit... | \n", + "
34 | \n", + "you look like you got the measles | \n", + "0.0 | \n", + "0.666667 | \n", + "7 | \n", + "look got measles | \n", + "
35 | \n", + "<user> <user> other than that, his defense is ... | \n", + "-1.0 | \n", + "0.333333 | \n", + "20 | \n", + "defense vaccines harmful american life unhealthy | \n", + "
36 | \n", + "like hello ranked reset they probably did bad ... | \n", + "0.0 | \n", + "1.000000 | \n", + "21 | \n", + "hello ranked reset probably bad placements mmr... | \n", + "
37 | \n", + "amid measles outbreak, vaccines for teachers a... | \n", + "0.0 | \n", + "0.666667 | \n", + "18 | \n", + "amid measles outbreak vaccines teachers arent ... | \n", + "
38 | \n", + "<user> <user> <user> <user> “<user> <url> meas... | \n", + "0.0 | \n", + "0.666667 | \n", + "8 | \n", + "measles threat | \n", + "
39 | \n", + "improve mood, energy, immunity, cardio health.... | \n", + "0.0 | \n", + "1.000000 | \n", + "15 | \n", + "improve mood energy immunity cardio health kit... | \n", + "
40 | \n", + "mt <user> new #vaccination bill would end exem... | \n", + "0.0 | \n", + "1.000000 | \n", + "15 | \n", + "mt new bill end exemptions personal religious via | \n", + "
41 | \n", + "autism and immunizations: should you vaccinate... | \n", + "0.0 | \n", + "0.666667 | \n", + "7 | \n", + "autism immunizations vaccinate | \n", + "
42 | \n", + "bart riders warned about measles infection fro... | \n", + "0.0 | \n", + "1.000000 | \n", + "18 | \n", + "bart riders warned measles infection contagiou... | \n", + "
43 | \n", + ".<user> u.s. #measles cases hit 15-year high. ... | \n", + "0.0 | \n", + "0.666667 | \n", + "22 | \n", + "us cases hit year high far year cases measles ... | \n", + "
44 | \n", + "pull up myxx nightlife.... stint t performing ... | \n", + "0.0 | \n", + "1.000000 | \n", + "15 | \n", + "pull myxx nightlife stint t performing live ho... | \n", + "
45 | \n", + "cdc eyeing bird flu vaccine for humans, though... | \n", + "0.0 | \n", + "0.666667 | \n", + "19 | \n", + "cdc eyeing bird flu vaccine humans though risk... | \n", + "
46 | \n", + "involved in fight against #measles and other #... | \n", + "1.0 | \n", + "0.666667 | \n", + "16 | \n", + "involved fight preventable diseases tenure tra... | \n", + "
47 | \n", + "<user> i'm not vaccinating my kids lol | \n", + "-1.0 | \n", + "1.000000 | \n", + "7 | \n", + "im vaccinating kids lol | \n", + "
48 | \n", + "alleged victim reviewing immunity paperwork so... | \n", + "0.0 | \n", + "1.000000 | \n", + "15 | \n", + "alleged victim reviewing immunity paperwork re... | \n", + "
49 | \n", + "thanks<user> for being more crystal clear abou... | \n", + "1.0 | \n", + "0.666667 | \n", + "20 | \n", + "thanksuser crystal clear lack evidence linking... | \n", + "
50 | \n", + "<user> giving me dvds on how vaccinating child... | \n", + "0.0 | \n", + "0.666667 | \n", + "10 | \n", + "giving dvds vaccinating children bad | \n", + "
51 | \n", + "hey now...<user> #mixmasterrod #follow #madhat... | \n", + "0.0 | \n", + "1.000000 | \n", + "12 | \n", + "hey nowuser mixmasterrods upstairs lounge | \n", + "
52 | \n", + "i seriously don't want to have a child until t... | \n", + "1.0 | \n", + "1.000000 | \n", + "16 | \n", + "seriously dont want child antivaccination move... | \n", + "
53 | \n", + "<user> <user> <user> epidemic of enterovirus;... | \n", + "0.0 | \n", + "1.000000 | \n", + "17 | \n", + "epidemic enterovirus mumps now measles drug re... | \n", + "
54 | \n", + "they said i wasn't gone be shit, lol looked li... | \n", + "0.0 | \n", + "1.000000 | \n", + "22 | \n", + "said wasnt gone shit lol looked proved wrong baby | \n", + "
55 | \n", + "flu shots at school boost vaccination rates: o... | \n", + "0.0 | \n", + "0.666667 | \n", + "19 | \n", + "flu shots school boost vaccination rates offer... | \n", + "
56 | \n", + "“<user> 1) marin county school board sides wit... | \n", + "0.0 | \n", + "0.666667 | \n", + "15 | \n", + "marin county school board sides young leukemia... | \n", + "
57 | \n", + "\"still running with the same niggas til the de... | \n", + "0.0 | \n", + "1.000000 | \n", + "16 | \n", + "still running niggas til death dibiasimb | \n", + "
\n", + " | clean_tweet | \n", + "label | \n", + "agreement | \n", + "
---|---|---|---|
0 | \n", + "amp big homie meanboy stegman st | \n", + "0.0 | \n", + "1.000000 | \n", + "
1 | \n", + "im thinking devoting career proving autism isn... | \n", + "1.0 | \n", + "1.000000 | \n", + "
2 | \n", + "vaccines vaccinate child | \n", + "-1.0 | \n", + "1.000000 | \n", + "
3 | \n", + "mean immunize kid something wont secretly kill... | \n", + "-1.0 | \n", + "1.000000 | \n", + "
4 | \n", + "thanks catch performing la nuit nyc st ave sho... | \n", + "0.0 | \n", + "1.000000 | \n", + "
5 | \n", + "nearly year old study mental health studies va... | \n", + "1.0 | \n", + "0.666667 | \n", + "
6 | \n", + "study kids finds link mmr vaccine autism | \n", + "1.0 | \n", + "0.666667 | \n", + "
7 | \n", + "psa vaccinate fucking kids | \n", + "1.0 | \n", + "1.000000 | \n", + "
8 | \n", + "coughing extra shuttle everyone thinks measles | \n", + "1.0 | \n", + "0.666667 | \n", + "
9 | \n", + "aids vaccine created oregon health amp science... | \n", + "1.0 | \n", + "0.666667 | \n", + "
10 | \n", + "point text butwbon jovi cover playin alibis ho... | \n", + "0.0 | \n", + "1.000000 | \n", + "
\n", + " | clean_tweet | \n", + "label | \n", + "agreement | \n", + "
---|---|---|---|
0 | \n", + "amp big homie meanboy stegman st | \n", + "0.0 | \n", + "1.000000 | \n", + "
1 | \n", + "im thinking devoting career proving autism isn... | \n", + "1.0 | \n", + "1.000000 | \n", + "
2 | \n", + "vaccine vaccinate child | \n", + "-1.0 | \n", + "1.000000 | \n", + "
3 | \n", + "mean immunize kid something wont secretly kill... | \n", + "-1.0 | \n", + "1.000000 | \n", + "
4 | \n", + "thanks catch performing la nuit nyc st ave sho... | \n", + "0.0 | \n", + "1.000000 | \n", + "
5 | \n", + "nearly year old study mental health study vacc... | \n", + "1.0 | \n", + "0.666667 | \n", + "
6 | \n", + "study kid find link mmr vaccine autism | \n", + "1.0 | \n", + "0.666667 | \n", + "
7 | \n", + "psa vaccinate fucking kid | \n", + "1.0 | \n", + "1.000000 | \n", + "
8 | \n", + "coughing extra shuttle everyone think measles | \n", + "1.0 | \n", + "0.666667 | \n", + "
9 | \n", + "aid vaccine created oregon health amp science ... | \n", + "1.0 | \n", + "0.666667 | \n", + "
10 | \n", + "point text butwbon jovi cover playin alibi hop... | \n", + "0.0 | \n", + "1.000000 | \n", + "
11 | \n", + "prediction vaccine exemption arizona will end ... | \n", + "0.0 | \n", + "0.666667 | \n", + "
12 | \n", + "getting vaccine cheryl southern nevada health ... | \n", + "1.0 | \n", + "1.000000 | \n", + "
13 | \n", + "mug noche mad hatter | \n", + "0.0 | \n", + "1.000000 | \n", + "
14 | \n", + "got influenza vaccine purdue university studen... | \n", + "1.0 | \n", + "0.666667 | \n", + "
15 | \n", + "sb enroll meningococcal diseasepupils immunize... | \n", + "0.0 | \n", + "0.666667 | \n", + "
16 | \n", + "increasing number parent skip vaccination chil... | \n", + "0.0 | \n", + "1.000000 | \n", + "
17 | \n", + "thank standing ca parent child support | \n", + "1.0 | \n", + "1.000000 | \n", + "
18 | \n", + "dude idc disney land measles mean shorter line... | \n", + "0.0 | \n", + "1.000000 | \n", + "
19 | \n", + "beeftalk start calf vaccination now via good a... | \n", + "1.0 | \n", + "1.000000 | \n", + "
20 | \n", + "dont care say probably kid vaccinated | \n", + "1.0 | \n", + "1.000000 | \n", + "
21 | \n", + "small study show pt lupus shingle vaccine safe... | \n", + "1.0 | \n", + "0.666667 | \n", + "
22 | \n", + "cdc measles epidemic pose travel risk usatlyix... | \n", + "0.0 | \n", + "1.000000 | \n", + "
23 | \n", + "every time see vaccinate debate wonder one sid... | \n", + "1.0 | \n", + "1.000000 | \n", + "
24 | \n", + "lied hid data black boy uncreased risk develop... | \n", + "-1.0 | \n", + "1.000000 | \n", + "
25 | \n", + "vaccine causing autism | \n", + "-1.0 | \n", + "1.000000 | \n", + "
26 | \n", + "rarely see argument vaccination actually big deal | \n", + "0.0 | \n", + "0.666667 | \n", + "
27 | \n", + "im obsessed w ebola following outbreak world d... | \n", + "1.0 | \n", + "0.666667 | \n", + "
28 | \n", + "joshthenewt suck game haha well people say don... | \n", + "0.0 | \n", + "1.000000 | \n", + "
29 | \n", + "dont shake hand pocahontas thats probably meas... | \n", + "0.0 | \n", + "0.666667 | \n", + "
30 | \n", + "yes im part public health im part nature feed ... | \n", + "1.0 | \n", + "1.000000 | \n", + "
31 | \n", + "new study show vaccine associated autism news ... | \n", + "1.0 | \n", + "1.000000 | \n", + "
32 | \n", + "repetitive le shocked increase asd even geneti... | \n", + "1.0 | \n", + "1.000000 | \n", + "
33 | \n", + "glad got vaccinated health alert case meningit... | \n", + "1.0 | \n", + "1.000000 | \n", + "
34 | \n", + "look got measles | \n", + "0.0 | \n", + "0.666667 | \n", + "
35 | \n", + "defense vaccine harmful american life unhealthy | \n", + "-1.0 | \n", + "0.333333 | \n", + "
36 | \n", + "hello ranked reset probably bad placement mmr ... | \n", + "0.0 | \n", + "1.000000 | \n", + "
37 | \n", + "amid measles outbreak vaccine teacher arent re... | \n", + "0.0 | \n", + "0.666667 | \n", + "
38 | \n", + "measles threat | \n", + "0.0 | \n", + "0.666667 | \n", + "
39 | \n", + "improve mood energy immunity cardio health kit... | \n", + "0.0 | \n", + "1.000000 | \n", + "
40 | \n", + "mt new bill end exemption personal religious via | \n", + "0.0 | \n", + "1.000000 | \n", + "
41 | \n", + "autism immunization vaccinate | \n", + "0.0 | \n", + "0.666667 | \n", + "
42 | \n", + "bart rider warned measles infection contagious... | \n", + "0.0 | \n", + "1.000000 | \n", + "
43 | \n", + "u case hit year high far year case measles rep... | \n", + "0.0 | \n", + "0.666667 | \n", + "
44 | \n", + "pull myxx nightlife stint t performing live ho... | \n", + "0.0 | \n", + "1.000000 | \n", + "
45 | \n", + "cdc eyeing bird flu vaccine human though risk ... | \n", + "0.0 | \n", + "0.666667 | \n", + "
46 | \n", + "involved fight preventable disease tenure trac... | \n", + "1.0 | \n", + "0.666667 | \n", + "
47 | \n", + "im vaccinating kid lol | \n", + "-1.0 | \n", + "1.000000 | \n", + "
48 | \n", + "alleged victim reviewing immunity paperwork re... | \n", + "0.0 | \n", + "1.000000 | \n", + "
49 | \n", + "thanksuser crystal clear lack evidence linking... | \n", + "1.0 | \n", + "0.666667 | \n", + "
50 | \n", + "giving dvd vaccinating child bad | \n", + "0.0 | \n", + "0.666667 | \n", + "