Fixed AQI Downloader Python Notebook

#3
by kalpshah18 - opened
Files changed (1) hide show
  1. aqi_downloader.ipynb +125 -84
aqi_downloader.ipynb CHANGED
@@ -2,7 +2,7 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 13,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
@@ -20,7 +20,7 @@
20
  },
21
  {
22
  "cell_type": "code",
23
- "execution_count": 14,
24
  "metadata": {},
25
  "outputs": [],
26
  "source": [
@@ -34,7 +34,9 @@
34
  " file_exists, file_path, file_name = check_exists(date)\n",
35
  " if file_exists:\n",
36
  " return file_path\n",
37
- " \n",
 
 
38
  " url = f\"https://cpcb.nic.in//upload/Downloads/{file_name}\"\n",
39
  " response = requests.get(url)\n",
40
  " if response.status_code == 200:\n",
@@ -48,7 +50,7 @@
48
  },
49
  {
50
  "cell_type": "code",
51
- "execution_count": 15,
52
  "metadata": {},
53
  "outputs": [
54
  {
@@ -59,19 +61,19 @@
59
  " '2016-01-05', '2016-01-06', '2016-01-07', '2016-01-08',\n",
60
  " '2016-01-09', '2016-01-10',\n",
61
  " ...\n",
62
- " '2024-11-02', '2024-11-03', '2024-11-04', '2024-11-05',\n",
63
- " '2024-11-06', '2024-11-07', '2024-11-08', '2024-11-09',\n",
64
- " '2024-11-10', '2024-11-11'],\n",
65
- " dtype='datetime64[ns]', length=3238, freq='D')\n"
66
  ]
67
  },
68
  {
69
  "data": {
70
  "text/plain": [
71
- "(None, 3238)"
72
  ]
73
  },
74
- "execution_count": 15,
75
  "metadata": {},
76
  "output_type": "execute_result"
77
  }
@@ -84,7 +86,7 @@
84
  },
85
  {
86
  "cell_type": "code",
87
- "execution_count": 16,
88
  "metadata": {},
89
  "outputs": [
90
  {
@@ -92,8 +94,9 @@
92
  "output_type": "stream",
93
  "text": [
94
  "Failed to download https://cpcb.nic.in//upload/Downloads/AQI_Bulletin_20160606.pdf with status code 404\n",
 
95
  "Failed to download https://cpcb.nic.in//upload/Downloads/AQI_Bulletin_20171014.pdf with status code 404\n",
96
- "Failed to download https://cpcb.nic.in//upload/Downloads/AQI_Bulletin_20170618.pdf with status code 404\n"
97
  ]
98
  }
99
  ],
@@ -104,23 +107,23 @@
104
  },
105
  {
106
  "cell_type": "code",
107
- "execution_count": 17,
108
  "metadata": {},
109
  "outputs": [
110
  {
111
  "name": "stdout",
112
  "output_type": "stream",
113
  "text": [
114
- "3238\n"
115
  ]
116
  },
117
  {
118
  "data": {
119
  "text/plain": [
120
- "3235"
121
  ]
122
  },
123
- "execution_count": 17,
124
  "metadata": {},
125
  "output_type": "execute_result"
126
  }
@@ -133,35 +136,22 @@
133
  },
134
  {
135
  "cell_type": "code",
136
- "execution_count": 18,
137
  "metadata": {},
138
  "outputs": [
139
  {
140
  "data": {
141
  "application/vnd.jupyter.widget-view+json": {
142
- "model_id": "6b1ea587cbcb48f6838b51ebc5b2dfc3",
143
  "version_major": 2,
144
  "version_minor": 0
145
  },
146
  "text/plain": [
147
- " 0%| | 0/3238 [00:00<?, ?it/s]"
148
  ]
149
  },
150
  "metadata": {},
151
  "output_type": "display_data"
152
- },
153
- {
154
- "name": "stdout",
155
- "output_type": "stream",
156
- "text": [
157
- "File AQI_Bulletin_20160606.pdf does not exist\n",
158
- "No tables found in AQI_data/AQI_Bulletin_20160704.pdf\n",
159
- "No tables found in AQI_data/AQI_Bulletin_20160721.pdf\n",
160
- "No tables found in AQI_data/AQI_Bulletin_20160723.pdf\n",
161
- "No tables found in AQI_data/AQI_Bulletin_20160722.pdf\n",
162
- "File AQI_Bulletin_20170618.pdf does not exist\n",
163
- "File AQI_Bulletin_20171014.pdf does not exist\n"
164
- ]
165
  }
166
  ],
167
  "source": [
@@ -309,6 +299,11 @@
309
  " raise ValueError(\"Table pattern not recognized\")\n",
310
  "\n",
311
  "def process_file(date):\n",
 
 
 
 
 
312
  " file_exists, file_path, file_name = check_exists(date)\n",
313
  " if not file_exists:\n",
314
  " print(f\"File {file_name} does not exist\")\n",
@@ -364,6 +359,49 @@
364
  "_ = Parallel(48)(delayed(process_file)(file_path) for file_path in tqdm(dates))"
365
  ]
366
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
367
  {
368
  "cell_type": "markdown",
369
  "metadata": {},
@@ -373,16 +411,16 @@
373
  },
374
  {
375
  "cell_type": "code",
376
- "execution_count": 19,
377
  "metadata": {},
378
  "outputs": [
379
  {
380
  "data": {
381
  "text/plain": [
382
- "372342"
383
  ]
384
  },
385
- "execution_count": 19,
386
  "metadata": {},
387
  "output_type": "execute_result"
388
  }
@@ -394,14 +432,14 @@
394
  },
395
  {
396
  "cell_type": "code",
397
- "execution_count": 20,
398
  "metadata": {},
399
  "outputs": [
400
  {
401
  "name": "stdout",
402
  "output_type": "stream",
403
  "text": [
404
- "['Agartala', 'Agra', 'Ahmedabad', 'Ahmednagar', 'Aizawl', 'Ajmer', 'Akola', 'Alwar', 'Ambala', 'Amravati', 'Amritsar', 'Anantapur', 'Angul', 'Ankleshwar', 'Araria', 'Ariyalur', 'Arrah', 'Asansol', 'Aurangabad (Bihar)', 'Aurangabad(Maharashtra)', 'Baddi', 'Badlapur', 'Bagalkot', 'Baghpat', 'Bahadurgarh', 'Balasore', 'Ballabgarh', 'Banswara', 'Baran', 'Barbil', 'Bareilly', 'Baripada', 'Barmer', 'Barrackpore', 'Bathinda', 'Begusarai', 'Belapur', 'Belgaum', 'Bengaluru', 'Bettiah', 'Bhagalpur', 'Bharatpur', 'Bhilai', 'Bhilwara', 'Bhiwadi', 'Bhiwandi', 'Bhiwani', 'Bhopal', 'Bhubaneswar', 'Bidar', 'Bihar Sharif', 'Bikaner', 'Bilaspur', 'Bileipada', 'Boisar', 'Brajrajnagar', 'Bulandshahr', 'Bundi', 'Buxar', 'Byasanagar', 'Byrnihat', 'Chamarajanagar', 'Chandigarh', 'Chandrapur', 'Charkhi Dadri', 'Chengalpattu', 'Chennai', 'Chhal', 'Chhapra', 'Chikkaballapur', 'Chikkamagaluru', 'Chittoor', 'Chittorgarh', 'Churu', 'Coimbtore', 'Cuddalore', 'Cuttack', 'Damoh', 'Darbhanga', 'Dausa', 'Davanagere', 'Dehradun', 'Delhi', 'Dewas', 'Dhanbad', 'Dharuhera', 'Dharwad', 'Dholpur', 'Dhule', 'Dindigul', 'Dungarpur', 'Durgapur', 'Eloor', 'Ernakulam', 'Faridabad', 'Fatehabad', 'Firozabad', 'Gadag', 'Gandhinagar', 'Gangtok', 'Gaya', 'Ghaziabad', 'Gorakhpur', 'Greater_Noida', 'Gummidipoondi', 'Gurugram', 'Guwahati', 'Gwalior', 'Hajipur', 'Haldia', 'Hanumangarh', 'Hapur', 'Hassan', 'Haveri', 'Hisar', 'Hosur', 'Howrah', 'Hubballi', 'Hyderabad', 'Imphal', 'Indore', 'Jabalpur', 'Jaipur', 'Jaisalmer', 'Jalandhar', 'Jalgaon', 'Jalna', 'Jalore', 'Jhalawar', 'Jhansi', 'Jharsuguda', 'Jhunjhunu', 'Jind', 'Jodhpur', 'Jorapokhar', 'Kadapa', 'Kaithal', 'Kalaburgi', 'Kalyan', 'Kanchipuram', 'Kannur', 'Kanpur', 'Karauli', 'Karnal', 'Karur', 'Karwar', 'Kashipur', 'Katihar', 'Katni', 'Keonjhar', 'Khanna', 'Khurja', 'Kishanganj', 'Kochi', 'Kohima', 'Kolar', 'Kolhapur', 'Kolkata', 'Kollam', 'Koppal', 'Korba', 'Kota', 'Kozhikode', 'Kunjemura', 'Kurushketra', 'Latur', 'Loni_Ghaziabad', 'Lucknow', 'Ludhiana', 'Madurai', 'Mahad', 'Maihar', 'Malegaon', 'Mandi Gobindgarh', 'Mandideep', 'Mandikhera', 'Manesar', 'Mangalore', 'Manguraha', 'Medikeri', 'Meerut', 'Milupara', 'Mira-Bhayandar', 'Moradabad', 'Motihari', 'Mumbai', 'Munger', 'Muzaffarnagar', 'Muzaffarpur', 'Mysuru', 'NOIDA', 'Nagaon', 'Nagapattinam', 'Nagaur', 'Nagpur', 'Naharlagun', 'Nalbari', 'Nanded', 'Nandesari', 'Narnaul', 'Nashik', 'Navi Mumbai', 'Nayagarh', 'Noida', 'Ooty', 'Pali', 'Palkalaiperur', 'Palwal', 'Panchkula', 'Panipat', 'Parbhani', 'Pathardih', 'Patiala', 'Patna', 'Pimpri-Chinchwad', 'Pithampur', 'Pratapgarh', 'Prayagraj', 'Puducherry', 'Pudukottai', 'Pune', 'Purnia', 'Raichur', 'Raipur', 'Rairangpur', 'Rajamahendravaram', 'Rajgir', 'Rajsamand', 'Ramanagara', 'Ramanathapuram', 'Ranipet', 'Ratlam', 'Rishikesh', 'Rohtak', 'Rourkela', 'Rupnagar', 'Sagar', 'Saharsa', 'Salem', 'Samastipur', 'Sangli', 'Sasaram', 'Satna', 'Sawai Madhopur', 'Shillong', 'Shivamogga', 'Sikar', 'Silchar', 'Siliguri', 'Singrauli', 'Sirohi', 'Sirsa', 'Sivasagar', 'Siwan', 'Solapur', 'Sonipat', 'Sri Ganganagar', 'Srinagar', 'Suakati', 'Surat', 'Talcher', 'Tensa', 'Thane', 'Thanjavur', 'Thiruvananthapuram', 'Thoothukudi', 'Thrissur', 'Tiruchirappalli', 'Tirunelveli', 'Tirupati', 'Tirupur', 'Tonk', 'Tumidih', 'Udaipur', 'Udupi', 'Ujjain', 'Ulhasnagar', 'Vapi', 'Varanasi', 'Vatva', 'Vellore', 'Vijayapura', 'Vijayawada', 'Virar', 'Virudhunagar', 'Visakhapatnam', 'Vrindavan', 'Yadgir', 'Yamunanagar']\n"
405
  ]
406
  }
407
  ],
@@ -420,6 +458,7 @@
420
  " \"Manglore\": \"Mangalore\",\n",
421
  " \"Pimpri Chinchwad\": \"Pimpri-Chinchwad\",\n",
422
  " \"Tumakuru\": \"Tumidih\",\n",
 
423
  " \"Tiruppur\": \"Tirupur\",\n",
424
  " \"Yamuna Nagar\": \"Yamunanagar\",\n",
425
  " \"vellore\": \"Vellore\" # duplicate, can map to itself or be handled separately\n",
@@ -438,48 +477,49 @@
438
  },
439
  {
440
  "cell_type": "code",
441
- "execution_count": 21,
442
  "metadata": {},
443
  "outputs": [
444
  {
445
  "data": {
446
  "text/plain": [
447
  "State\n",
448
- "Andhra Pradesh 10881\n",
449
- "Arunachal Pradesh 557\n",
450
- "Assam 4582\n",
451
- "Bihar 26391\n",
452
- "Chandigarh 1874\n",
453
- "Chhattisgarh 4674\n",
454
- "Delhi 3224\n",
455
- "Gujarat 11558\n",
456
- "Haryana 49090\n",
457
- "Himachal Pradesh 916\n",
458
- "Jammu and Kashmir 822\n",
459
- "Jharkhand 1872\n",
460
- "Karnataka 33054\n",
461
- "Kerala 11121\n",
462
- "Madhya Pradesh 29800\n",
463
- "Maharashtra 35954\n",
464
- "Manipur 724\n",
465
- "Meghalaya 1839\n",
466
- "Mizoram 1433\n",
467
- "Nagaland 1305\n",
468
- "Odisha 10816\n",
469
- "Puducherry 1330\n",
470
- "Punjab 18876\n",
471
- "Rajasthan 34231\n",
472
- "Sikkim 772\n",
473
- "Tamil Nadu 12170\n",
474
- "Telangana 3216\n",
475
- "Tripura 1349\n",
476
- "Uttar Pradesh 39710\n",
477
- "Uttarakhand 1866\n",
478
- "West Bengal 14674\n",
 
479
  "Name: count, dtype: int64"
480
  ]
481
  },
482
- "execution_count": 21,
483
  "metadata": {},
484
  "output_type": "execute_result"
485
  }
@@ -593,6 +633,7 @@
593
  " 'Solapur': 'Maharashtra',\n",
594
  " 'Sonipat': 'Haryana',\n",
595
  " 'Sri Ganganagar': 'Rajasthan',\n",
 
596
  " 'Srinagar': 'Jammu and Kashmir',\n",
597
  " 'Suakati': 'Odisha',\n",
598
  " 'Surat': 'Gujarat',\n",
@@ -631,7 +672,7 @@
631
  },
632
  {
633
  "cell_type": "code",
634
- "execution_count": 34,
635
  "metadata": {},
636
  "outputs": [
637
  {
@@ -694,12 +735,12 @@
694
  "0 Agra 417 PM\\n2.5 Severe \n",
695
  "1 Bengaluru 95 PM , PM\\n2.5 10 Satisfactory \n",
696
  "\n",
697
- " Based on number of monitoring stations Date State \n",
698
- "0 1 2016-01-01 Uttar Pradesh \n",
699
- "1 5 2016-01-01 Karnataka "
700
  ]
701
  },
702
- "execution_count": 34,
703
  "metadata": {},
704
  "output_type": "execute_result"
705
  }
@@ -712,16 +753,16 @@
712
  },
713
  {
714
  "cell_type": "code",
715
- "execution_count": 37,
716
  "metadata": {},
717
  "outputs": [
718
  {
719
  "data": {
720
  "text/plain": [
721
- "370681"
722
  ]
723
  },
724
- "execution_count": 37,
725
  "metadata": {},
726
  "output_type": "execute_result"
727
  }
@@ -732,15 +773,15 @@
732
  },
733
  {
734
  "cell_type": "code",
735
- "execution_count": 43,
736
  "metadata": {},
737
  "outputs": [
738
  {
739
  "name": "stdout",
740
  "output_type": "stream",
741
  "text": [
742
- "370681\n",
743
- "369967\n"
744
  ]
745
  }
746
  ],
@@ -753,7 +794,7 @@
753
  },
754
  {
755
  "cell_type": "code",
756
- "execution_count": 46,
757
  "metadata": {},
758
  "outputs": [],
759
  "source": [
@@ -763,7 +804,7 @@
763
  ],
764
  "metadata": {
765
  "kernelspec": {
766
- "display_name": "zeel_py310",
767
  "language": "python",
768
  "name": "python3"
769
  },
@@ -777,7 +818,7 @@
777
  "name": "python",
778
  "nbconvert_exporter": "python",
779
  "pygments_lexer": "ipython3",
780
- "version": "3.10.15"
781
  }
782
  },
783
  "nbformat": 4,
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 1,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
 
20
  },
21
  {
22
  "cell_type": "code",
23
+ "execution_count": 5,
24
  "metadata": {},
25
  "outputs": [],
26
  "source": [
 
34
  " file_exists, file_path, file_name = check_exists(date)\n",
35
  " if file_exists:\n",
36
  " return file_path\n",
37
+ "\n",
38
+ " os.makedirs(\"AQI_data\", exist_ok=True)\n",
39
+ "\n",
40
  " url = f\"https://cpcb.nic.in//upload/Downloads/{file_name}\"\n",
41
  " response = requests.get(url)\n",
42
  " if response.status_code == 200:\n",
 
50
  },
51
  {
52
  "cell_type": "code",
53
+ "execution_count": 6,
54
  "metadata": {},
55
  "outputs": [
56
  {
 
61
  " '2016-01-05', '2016-01-06', '2016-01-07', '2016-01-08',\n",
62
  " '2016-01-09', '2016-01-10',\n",
63
  " ...\n",
64
+ " '2025-02-17', '2025-02-18', '2025-02-19', '2025-02-20',\n",
65
+ " '2025-02-21', '2025-02-22', '2025-02-23', '2025-02-24',\n",
66
+ " '2025-02-25', '2025-02-26'],\n",
67
+ " dtype='datetime64[ns]', length=3345, freq='D')\n"
68
  ]
69
  },
70
  {
71
  "data": {
72
  "text/plain": [
73
+ "(None, 3345)"
74
  ]
75
  },
76
+ "execution_count": 6,
77
  "metadata": {},
78
  "output_type": "execute_result"
79
  }
 
86
  },
87
  {
88
  "cell_type": "code",
89
+ "execution_count": 7,
90
  "metadata": {},
91
  "outputs": [
92
  {
 
94
  "output_type": "stream",
95
  "text": [
96
  "Failed to download https://cpcb.nic.in//upload/Downloads/AQI_Bulletin_20160606.pdf with status code 404\n",
97
+ "Failed to download https://cpcb.nic.in//upload/Downloads/AQI_Bulletin_20170618.pdf with status code 404\n",
98
  "Failed to download https://cpcb.nic.in//upload/Downloads/AQI_Bulletin_20171014.pdf with status code 404\n",
99
+ "Failed to download https://cpcb.nic.in//upload/Downloads/AQI_Bulletin_20250101.pdf with status code 404\n"
100
  ]
101
  }
102
  ],
 
107
  },
108
  {
109
  "cell_type": "code",
110
+ "execution_count": 8,
111
  "metadata": {},
112
  "outputs": [
113
  {
114
  "name": "stdout",
115
  "output_type": "stream",
116
  "text": [
117
+ "3345\n"
118
  ]
119
  },
120
  {
121
  "data": {
122
  "text/plain": [
123
+ "3341"
124
  ]
125
  },
126
+ "execution_count": 8,
127
  "metadata": {},
128
  "output_type": "execute_result"
129
  }
 
136
  },
137
  {
138
  "cell_type": "code",
139
+ "execution_count": 9,
140
  "metadata": {},
141
  "outputs": [
142
  {
143
  "data": {
144
  "application/vnd.jupyter.widget-view+json": {
145
+ "model_id": "438a2a0c07fb4367b18a4deff93364e9",
146
  "version_major": 2,
147
  "version_minor": 0
148
  },
149
  "text/plain": [
150
+ " 0%| | 0/3345 [00:00<?, ?it/s]"
151
  ]
152
  },
153
  "metadata": {},
154
  "output_type": "display_data"
 
 
 
 
 
 
 
 
 
 
 
 
 
155
  }
156
  ],
157
  "source": [
 
299
  " raise ValueError(\"Table pattern not recognized\")\n",
300
  "\n",
301
  "def process_file(date):\n",
302
+ " folders = [\"AQI_data\", \"AQI_data_csv\"]\n",
303
+ "\n",
304
+ " for folder in folders:\n",
305
+ " if not os.path.exists(folder):\n",
306
+ " os.makedirs(folder)\n",
307
  " file_exists, file_path, file_name = check_exists(date)\n",
308
  " if not file_exists:\n",
309
  " print(f\"File {file_name} does not exist\")\n",
 
359
  "_ = Parallel(48)(delayed(process_file)(file_path) for file_path in tqdm(dates))"
360
  ]
361
  },
362
+ {
363
+ "cell_type": "markdown",
364
+ "metadata": {},
365
+ "source": [
366
+ "## Creating Merged DataFrame"
367
+ ]
368
+ },
369
+ {
370
+ "cell_type": "code",
371
+ "execution_count": 10,
372
+ "metadata": {},
373
+ "outputs": [
374
+ {
375
+ "name": "stdout",
376
+ "output_type": "stream",
377
+ "text": [
378
+ "Merged CSV saved as AQI_data_csv/merged.csv\n"
379
+ ]
380
+ }
381
+ ],
382
+ "source": [
383
+ "import os\n",
384
+ "import pandas as pd\n",
385
+ "\n",
386
+ "def merge_csv_files(folder_path, output_file):\n",
387
+ " csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]\n",
388
+ " \n",
389
+ " if not csv_files:\n",
390
+ " print(\"No CSV files found in the folder.\")\n",
391
+ " return\n",
392
+ "\n",
393
+ " df_list = [pd.read_csv(os.path.join(folder_path, file)) for file in csv_files]\n",
394
+ " merged_df = pd.concat(df_list, ignore_index=True)\n",
395
+ "\n",
396
+ " merged_df.to_csv(output_file, index=False)\n",
397
+ " print(f\"Merged CSV saved as {output_file}\")\n",
398
+ "\n",
399
+ "# Example usage\n",
400
+ "folder_path = \"AQI_data_csv\"\n",
401
+ "output_file = \"AQI_data_csv/merged.csv\"\n",
402
+ "merge_csv_files(folder_path, output_file)"
403
+ ]
404
+ },
405
  {
406
  "cell_type": "markdown",
407
  "metadata": {},
 
411
  },
412
  {
413
  "cell_type": "code",
414
+ "execution_count": 11,
415
  "metadata": {},
416
  "outputs": [
417
  {
418
  "data": {
419
  "text/plain": [
420
+ "397732"
421
  ]
422
  },
423
+ "execution_count": 11,
424
  "metadata": {},
425
  "output_type": "execute_result"
426
  }
 
432
  },
433
  {
434
  "cell_type": "code",
435
+ "execution_count": 12,
436
  "metadata": {},
437
  "outputs": [
438
  {
439
  "name": "stdout",
440
  "output_type": "stream",
441
  "text": [
442
+ "['Agartala', 'Agra', 'Ahmedabad', 'Ahmednagar', 'Aizawl', 'Ajmer', 'Akola', 'Alwar', 'Ambala', 'Amravati', 'Amritsar', 'Anantapur', 'Angul', 'Ankleshwar', 'Araria', 'Ariyalur', 'Arrah', 'Asansol', 'Aurangabad (Bihar)', 'Aurangabad(Maharashtra)', 'Baddi', 'Badlapur', 'Bagalkot', 'Baghpat', 'Bahadurgarh', 'Balasore', 'Ballabgarh', 'Banswara', 'Baran', 'Barbil', 'Bareilly', 'Baripada', 'Barmer', 'Barrackpore', 'Bathinda', 'Begusarai', 'Belapur', 'Belgaum', 'Bengaluru', 'Bettiah', 'Bhagalpur', 'Bharatpur', 'Bhilai', 'Bhilwara', 'Bhiwadi', 'Bhiwandi', 'Bhiwani', 'Bhopal', 'Bhubaneswar', 'Bidar', 'Bihar Sharif', 'Bikaner', 'Bilaspur', 'Bileipada', 'Boisar', 'Brajrajnagar', 'Bulandshahr', 'Bundi', 'Buxar', 'Byasanagar', 'Byrnihat', 'Chamarajanagar', 'Chandigarh', 'Chandrapur', 'Charkhi Dadri', 'Chengalpattu', 'Chennai', 'Chhal', 'Chhapra', 'Chikkaballapur', 'Chikkamagaluru', 'Chittoor', 'Chittorgarh', 'Churu', 'Coimbtore', 'Cuddalore', 'Cuttack', 'Damoh', 'Darbhanga', 'Dausa', 'Davanagere', 'Dehradun', 'Delhi', 'Dewas', 'Dhanbad', 'Dharuhera', 'Dharwad', 'Dholpur', 'Dhule', 'Dindigul', 'Dungarpur', 'Durgapur', 'Eloor', 'Ernakulam', 'Faridabad', 'Fatehabad', 'Firozabad', 'Gadag', 'Gandhinagar', 'Gangtok', 'Gaya', 'Ghaziabad', 'Gorakhpur', 'Greater_Noida', 'Gummidipoondi', 'Gurugram', 'Guwahati', 'Gwalior', 'Hajipur', 'Haldia', 'Hanumangarh', 'Hapur', 'Hassan', 'Haveri', 'Hisar', 'Hosur', 'Howrah', 'Hubballi', 'Hyderabad', 'Imphal', 'Indore', 'Jabalpur', 'Jaipur', 'Jaisalmer', 'Jalandhar', 'Jalgaon', 'Jalna', 'Jalore', 'Jhalawar', 'Jhansi', 'Jharsuguda', 'Jhunjhunu', 'Jind', 'Jodhpur', 'Jorapokhar', 'Kadapa', 'Kaithal', 'Kalaburgi', 'Kalyan', 'Kanchipuram', 'Kannur', 'Kanpur', 'Karauli', 'Karnal', 'Karur', 'Karwar', 'Kashipur', 'Katihar', 'Katni', 'Keonjhar', 'Khanna', 'Khurja', 'Kishanganj', 'Kochi', 'Kohima', 'Kolar', 'Kolhapur', 'Kolkata', 'Kollam', 'Koppal', 'Korba', 'Kota', 'Kozhikode', 'Kunjemura', 'Kurushketra', 'Latur', 'Loni_Ghaziabad', 'Lucknow', 'Ludhiana', 'Madurai', 'Mahad', 'Maihar', 'Malegaon', 'Mandi Gobindgarh', 'Mandideep', 'Mandikhera', 'Manesar', 'Mangalore', 'Manguraha', 'Medikeri', 'Meerut', 'Milupara', 'Mira-Bhayandar', 'Moradabad', 'Motihari', 'Mumbai', 'Munger', 'Muzaffarnagar', 'Muzaffarpur', 'Mysuru', 'NOIDA', 'Nagaon', 'Nagapattinam', 'Nagaur', 'Nagpur', 'Naharlagun', 'Nalbari', 'Nanded', 'Nandesari', 'Narnaul', 'Nashik', 'Navi Mumbai', 'Nayagarh', 'Noida', 'Ooty', 'Pali', 'Palkalaiperur', 'Palwal', 'Panchkula', 'Panipat', 'Parbhani', 'Pathardih', 'Patiala', 'Patna', 'Pimpri-Chinchwad', 'Pithampur', 'Pratapgarh', 'Prayagraj', 'Puducherry', 'Pudukottai', 'Pune', 'Purnia', 'Raichur', 'Raipur', 'Rairangpur', 'Rajamahendravaram', 'Rajgir', 'Rajsamand', 'Ramanagara', 'Ramanathapuram', 'Ranipet', 'Ratlam', 'Rishikesh', 'Rohtak', 'Rourkela', 'Rupnagar', 'Sagar', 'Saharsa', 'Salem', 'Samastipur', 'Sangli', 'Sasaram', 'Satna', 'Sawai Madhopur', 'Shillong', 'Shivamogga', 'Sikar', 'Silchar', 'Siliguri', 'Singrauli', 'Sirohi', 'Sirsa', 'Sivasagar', 'Siwan', 'Solapur', 'Sonipat', 'Sri Ganganagar', 'Sri Vijaya Puram', 'Srinagar', 'Suakati', 'Surat', 'Talcher', 'Tensa', 'Thane', 'Thanjavur', 'Thiruvananthapuram', 'Thoothukudi', 'Thrissur', 'Tiruchirappalli', 'Tirunelveli', 'Tirupati', 'Tirupur', 'Tonk', 'Tumidih', 'Udaipur', 'Udupi', 'Ujjain', 'Ulhasnagar', 'Vapi', 'Varanasi', 'Vatva', 'Vellore', 'Vijayapura', 'Vijayawada', 'Virar', 'Virudhunagar', 'Visakhapatnam', 'Vrindavan', 'Yadgir', 'Yamunanagar']\n"
443
  ]
444
  }
445
  ],
 
458
  " \"Manglore\": \"Mangalore\",\n",
459
  " \"Pimpri Chinchwad\": \"Pimpri-Chinchwad\",\n",
460
  " \"Tumakuru\": \"Tumidih\",\n",
461
+ " \"Tirumala\": \"Tirupati\",\n",
462
  " \"Tiruppur\": \"Tirupur\",\n",
463
  " \"Yamuna Nagar\": \"Yamunanagar\",\n",
464
  " \"vellore\": \"Vellore\" # duplicate, can map to itself or be handled separately\n",
 
477
  },
478
  {
479
  "cell_type": "code",
480
+ "execution_count": 13,
481
  "metadata": {},
482
  "outputs": [
483
  {
484
  "data": {
485
  "text/plain": [
486
  "State\n",
487
+ "Andaman and Nicobar 6\n",
488
+ "Andhra Pradesh 11546\n",
489
+ "Arunachal Pradesh 614\n",
490
+ "Assam 5099\n",
491
+ "Bihar 28633\n",
492
+ "Chandigarh 1980\n",
493
+ "Chhattisgarh 5357\n",
494
+ "Delhi 3330\n",
495
+ "Gujarat 12195\n",
496
+ "Haryana 50177\n",
497
+ "Himachal Pradesh 1022\n",
498
+ "Jammu and Kashmir 822\n",
499
+ "Jharkhand 2076\n",
500
+ "Karnataka 35248\n",
501
+ "Kerala 11549\n",
502
+ "Madhya Pradesh 31326\n",
503
+ "Maharashtra 39193\n",
504
+ "Manipur 790\n",
505
+ "Meghalaya 1956\n",
506
+ "Mizoram 1535\n",
507
+ "Nagaland 1398\n",
508
+ "Odisha 12363\n",
509
+ "Puducherry 1433\n",
510
+ "Punjab 19676\n",
511
+ "Rajasthan 37729\n",
512
+ "Sikkim 812\n",
513
+ "Tamil Nadu 14080\n",
514
+ "Telangana 3322\n",
515
+ "Tripura 1442\n",
516
+ "Uttar Pradesh 41800\n",
517
+ "Uttarakhand 2156\n",
518
+ "West Bengal 15406\n",
519
  "Name: count, dtype: int64"
520
  ]
521
  },
522
+ "execution_count": 13,
523
  "metadata": {},
524
  "output_type": "execute_result"
525
  }
 
633
  " 'Solapur': 'Maharashtra',\n",
634
  " 'Sonipat': 'Haryana',\n",
635
  " 'Sri Ganganagar': 'Rajasthan',\n",
636
+ " 'Sri Vijaya Puram': 'Andaman and Nicobar',\n",
637
  " 'Srinagar': 'Jammu and Kashmir',\n",
638
  " 'Suakati': 'Odisha',\n",
639
  " 'Surat': 'Gujarat',\n",
 
672
  },
673
  {
674
  "cell_type": "code",
675
+ "execution_count": 14,
676
  "metadata": {},
677
  "outputs": [
678
  {
 
735
  "0 Agra 417 PM\\n2.5 Severe \n",
736
  "1 Bengaluru 95 PM , PM\\n2.5 10 Satisfactory \n",
737
  "\n",
738
+ " Based on number of monitoring stations Date State \n",
739
+ "0 1 2016-01-01 Uttar Pradesh \n",
740
+ "1 5 2016-01-01 Karnataka "
741
  ]
742
  },
743
+ "execution_count": 14,
744
  "metadata": {},
745
  "output_type": "execute_result"
746
  }
 
753
  },
754
  {
755
  "cell_type": "code",
756
+ "execution_count": 15,
757
  "metadata": {},
758
  "outputs": [
759
  {
760
  "data": {
761
  "text/plain": [
762
+ "396071"
763
  ]
764
  },
765
+ "execution_count": 15,
766
  "metadata": {},
767
  "output_type": "execute_result"
768
  }
 
773
  },
774
  {
775
  "cell_type": "code",
776
+ "execution_count": 16,
777
  "metadata": {},
778
  "outputs": [
779
  {
780
  "name": "stdout",
781
  "output_type": "stream",
782
  "text": [
783
+ "396071\n",
784
+ "395213\n"
785
  ]
786
  }
787
  ],
 
794
  },
795
  {
796
  "cell_type": "code",
797
+ "execution_count": 17,
798
  "metadata": {},
799
  "outputs": [],
800
  "source": [
 
804
  ],
805
  "metadata": {
806
  "kernelspec": {
807
+ "display_name": "Python 3",
808
  "language": "python",
809
  "name": "python3"
810
  },
 
818
  "name": "python",
819
  "nbconvert_exporter": "python",
820
  "pygments_lexer": "ipython3",
821
+ "version": "3.12.4"
822
  }
823
  },
824
  "nbformat": 4,