Fixed AQI Downloader Python Notebook
#3
by
kalpshah18
- opened
- aqi_downloader.ipynb +125 -84
aqi_downloader.ipynb
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
-
"execution_count":
|
6 |
"metadata": {},
|
7 |
"outputs": [],
|
8 |
"source": [
|
@@ -20,7 +20,7 @@
|
|
20 |
},
|
21 |
{
|
22 |
"cell_type": "code",
|
23 |
-
"execution_count":
|
24 |
"metadata": {},
|
25 |
"outputs": [],
|
26 |
"source": [
|
@@ -34,7 +34,9 @@
|
|
34 |
" file_exists, file_path, file_name = check_exists(date)\n",
|
35 |
" if file_exists:\n",
|
36 |
" return file_path\n",
|
37 |
-
"
|
|
|
|
|
38 |
" url = f\"https://cpcb.nic.in//upload/Downloads/{file_name}\"\n",
|
39 |
" response = requests.get(url)\n",
|
40 |
" if response.status_code == 200:\n",
|
@@ -48,7 +50,7 @@
|
|
48 |
},
|
49 |
{
|
50 |
"cell_type": "code",
|
51 |
-
"execution_count":
|
52 |
"metadata": {},
|
53 |
"outputs": [
|
54 |
{
|
@@ -59,19 +61,19 @@
|
|
59 |
" '2016-01-05', '2016-01-06', '2016-01-07', '2016-01-08',\n",
|
60 |
" '2016-01-09', '2016-01-10',\n",
|
61 |
" ...\n",
|
62 |
-
" '
|
63 |
-
" '
|
64 |
-
" '
|
65 |
-
" dtype='datetime64[ns]', length=
|
66 |
]
|
67 |
},
|
68 |
{
|
69 |
"data": {
|
70 |
"text/plain": [
|
71 |
-
"(None,
|
72 |
]
|
73 |
},
|
74 |
-
"execution_count":
|
75 |
"metadata": {},
|
76 |
"output_type": "execute_result"
|
77 |
}
|
@@ -84,7 +86,7 @@
|
|
84 |
},
|
85 |
{
|
86 |
"cell_type": "code",
|
87 |
-
"execution_count":
|
88 |
"metadata": {},
|
89 |
"outputs": [
|
90 |
{
|
@@ -92,8 +94,9 @@
|
|
92 |
"output_type": "stream",
|
93 |
"text": [
|
94 |
"Failed to download https://cpcb.nic.in//upload/Downloads/AQI_Bulletin_20160606.pdf with status code 404\n",
|
|
|
95 |
"Failed to download https://cpcb.nic.in//upload/Downloads/AQI_Bulletin_20171014.pdf with status code 404\n",
|
96 |
-
"Failed to download https://cpcb.nic.in//upload/Downloads/
|
97 |
]
|
98 |
}
|
99 |
],
|
@@ -104,23 +107,23 @@
|
|
104 |
},
|
105 |
{
|
106 |
"cell_type": "code",
|
107 |
-
"execution_count":
|
108 |
"metadata": {},
|
109 |
"outputs": [
|
110 |
{
|
111 |
"name": "stdout",
|
112 |
"output_type": "stream",
|
113 |
"text": [
|
114 |
-
"
|
115 |
]
|
116 |
},
|
117 |
{
|
118 |
"data": {
|
119 |
"text/plain": [
|
120 |
-
"
|
121 |
]
|
122 |
},
|
123 |
-
"execution_count":
|
124 |
"metadata": {},
|
125 |
"output_type": "execute_result"
|
126 |
}
|
@@ -133,35 +136,22 @@
|
|
133 |
},
|
134 |
{
|
135 |
"cell_type": "code",
|
136 |
-
"execution_count":
|
137 |
"metadata": {},
|
138 |
"outputs": [
|
139 |
{
|
140 |
"data": {
|
141 |
"application/vnd.jupyter.widget-view+json": {
|
142 |
-
"model_id": "
|
143 |
"version_major": 2,
|
144 |
"version_minor": 0
|
145 |
},
|
146 |
"text/plain": [
|
147 |
-
" 0%| | 0/
|
148 |
]
|
149 |
},
|
150 |
"metadata": {},
|
151 |
"output_type": "display_data"
|
152 |
-
},
|
153 |
-
{
|
154 |
-
"name": "stdout",
|
155 |
-
"output_type": "stream",
|
156 |
-
"text": [
|
157 |
-
"File AQI_Bulletin_20160606.pdf does not exist\n",
|
158 |
-
"No tables found in AQI_data/AQI_Bulletin_20160704.pdf\n",
|
159 |
-
"No tables found in AQI_data/AQI_Bulletin_20160721.pdf\n",
|
160 |
-
"No tables found in AQI_data/AQI_Bulletin_20160723.pdf\n",
|
161 |
-
"No tables found in AQI_data/AQI_Bulletin_20160722.pdf\n",
|
162 |
-
"File AQI_Bulletin_20170618.pdf does not exist\n",
|
163 |
-
"File AQI_Bulletin_20171014.pdf does not exist\n"
|
164 |
-
]
|
165 |
}
|
166 |
],
|
167 |
"source": [
|
@@ -309,6 +299,11 @@
|
|
309 |
" raise ValueError(\"Table pattern not recognized\")\n",
|
310 |
"\n",
|
311 |
"def process_file(date):\n",
|
|
|
|
|
|
|
|
|
|
|
312 |
" file_exists, file_path, file_name = check_exists(date)\n",
|
313 |
" if not file_exists:\n",
|
314 |
" print(f\"File {file_name} does not exist\")\n",
|
@@ -364,6 +359,49 @@
|
|
364 |
"_ = Parallel(48)(delayed(process_file)(file_path) for file_path in tqdm(dates))"
|
365 |
]
|
366 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
367 |
{
|
368 |
"cell_type": "markdown",
|
369 |
"metadata": {},
|
@@ -373,16 +411,16 @@
|
|
373 |
},
|
374 |
{
|
375 |
"cell_type": "code",
|
376 |
-
"execution_count":
|
377 |
"metadata": {},
|
378 |
"outputs": [
|
379 |
{
|
380 |
"data": {
|
381 |
"text/plain": [
|
382 |
-
"
|
383 |
]
|
384 |
},
|
385 |
-
"execution_count":
|
386 |
"metadata": {},
|
387 |
"output_type": "execute_result"
|
388 |
}
|
@@ -394,14 +432,14 @@
|
|
394 |
},
|
395 |
{
|
396 |
"cell_type": "code",
|
397 |
-
"execution_count":
|
398 |
"metadata": {},
|
399 |
"outputs": [
|
400 |
{
|
401 |
"name": "stdout",
|
402 |
"output_type": "stream",
|
403 |
"text": [
|
404 |
-
"['Agartala', 'Agra', 'Ahmedabad', 'Ahmednagar', 'Aizawl', 'Ajmer', 'Akola', 'Alwar', 'Ambala', 'Amravati', 'Amritsar', 'Anantapur', 'Angul', 'Ankleshwar', 'Araria', 'Ariyalur', 'Arrah', 'Asansol', 'Aurangabad (Bihar)', 'Aurangabad(Maharashtra)', 'Baddi', 'Badlapur', 'Bagalkot', 'Baghpat', 'Bahadurgarh', 'Balasore', 'Ballabgarh', 'Banswara', 'Baran', 'Barbil', 'Bareilly', 'Baripada', 'Barmer', 'Barrackpore', 'Bathinda', 'Begusarai', 'Belapur', 'Belgaum', 'Bengaluru', 'Bettiah', 'Bhagalpur', 'Bharatpur', 'Bhilai', 'Bhilwara', 'Bhiwadi', 'Bhiwandi', 'Bhiwani', 'Bhopal', 'Bhubaneswar', 'Bidar', 'Bihar Sharif', 'Bikaner', 'Bilaspur', 'Bileipada', 'Boisar', 'Brajrajnagar', 'Bulandshahr', 'Bundi', 'Buxar', 'Byasanagar', 'Byrnihat', 'Chamarajanagar', 'Chandigarh', 'Chandrapur', 'Charkhi Dadri', 'Chengalpattu', 'Chennai', 'Chhal', 'Chhapra', 'Chikkaballapur', 'Chikkamagaluru', 'Chittoor', 'Chittorgarh', 'Churu', 'Coimbtore', 'Cuddalore', 'Cuttack', 'Damoh', 'Darbhanga', 'Dausa', 'Davanagere', 'Dehradun', 'Delhi', 'Dewas', 'Dhanbad', 'Dharuhera', 'Dharwad', 'Dholpur', 'Dhule', 'Dindigul', 'Dungarpur', 'Durgapur', 'Eloor', 'Ernakulam', 'Faridabad', 'Fatehabad', 'Firozabad', 'Gadag', 'Gandhinagar', 'Gangtok', 'Gaya', 'Ghaziabad', 'Gorakhpur', 'Greater_Noida', 'Gummidipoondi', 'Gurugram', 'Guwahati', 'Gwalior', 'Hajipur', 'Haldia', 'Hanumangarh', 'Hapur', 'Hassan', 'Haveri', 'Hisar', 'Hosur', 'Howrah', 'Hubballi', 'Hyderabad', 'Imphal', 'Indore', 'Jabalpur', 'Jaipur', 'Jaisalmer', 'Jalandhar', 'Jalgaon', 'Jalna', 'Jalore', 'Jhalawar', 'Jhansi', 'Jharsuguda', 'Jhunjhunu', 'Jind', 'Jodhpur', 'Jorapokhar', 'Kadapa', 'Kaithal', 'Kalaburgi', 'Kalyan', 'Kanchipuram', 'Kannur', 'Kanpur', 'Karauli', 'Karnal', 'Karur', 'Karwar', 'Kashipur', 'Katihar', 'Katni', 'Keonjhar', 'Khanna', 'Khurja', 'Kishanganj', 'Kochi', 'Kohima', 'Kolar', 'Kolhapur', 'Kolkata', 'Kollam', 'Koppal', 'Korba', 'Kota', 'Kozhikode', 'Kunjemura', 'Kurushketra', 'Latur', 'Loni_Ghaziabad', 'Lucknow', 'Ludhiana', 'Madurai', 'Mahad', 'Maihar', 'Malegaon', 'Mandi Gobindgarh', 'Mandideep', 'Mandikhera', 'Manesar', 'Mangalore', 'Manguraha', 'Medikeri', 'Meerut', 'Milupara', 'Mira-Bhayandar', 'Moradabad', 'Motihari', 'Mumbai', 'Munger', 'Muzaffarnagar', 'Muzaffarpur', 'Mysuru', 'NOIDA', 'Nagaon', 'Nagapattinam', 'Nagaur', 'Nagpur', 'Naharlagun', 'Nalbari', 'Nanded', 'Nandesari', 'Narnaul', 'Nashik', 'Navi Mumbai', 'Nayagarh', 'Noida', 'Ooty', 'Pali', 'Palkalaiperur', 'Palwal', 'Panchkula', 'Panipat', 'Parbhani', 'Pathardih', 'Patiala', 'Patna', 'Pimpri-Chinchwad', 'Pithampur', 'Pratapgarh', 'Prayagraj', 'Puducherry', 'Pudukottai', 'Pune', 'Purnia', 'Raichur', 'Raipur', 'Rairangpur', 'Rajamahendravaram', 'Rajgir', 'Rajsamand', 'Ramanagara', 'Ramanathapuram', 'Ranipet', 'Ratlam', 'Rishikesh', 'Rohtak', 'Rourkela', 'Rupnagar', 'Sagar', 'Saharsa', 'Salem', 'Samastipur', 'Sangli', 'Sasaram', 'Satna', 'Sawai Madhopur', 'Shillong', 'Shivamogga', 'Sikar', 'Silchar', 'Siliguri', 'Singrauli', 'Sirohi', 'Sirsa', 'Sivasagar', 'Siwan', 'Solapur', 'Sonipat', 'Sri Ganganagar', 'Srinagar', 'Suakati', 'Surat', 'Talcher', 'Tensa', 'Thane', 'Thanjavur', 'Thiruvananthapuram', 'Thoothukudi', 'Thrissur', 'Tiruchirappalli', 'Tirunelveli', 'Tirupati', 'Tirupur', 'Tonk', 'Tumidih', 'Udaipur', 'Udupi', 'Ujjain', 'Ulhasnagar', 'Vapi', 'Varanasi', 'Vatva', 'Vellore', 'Vijayapura', 'Vijayawada', 'Virar', 'Virudhunagar', 'Visakhapatnam', 'Vrindavan', 'Yadgir', 'Yamunanagar']\n"
|
405 |
]
|
406 |
}
|
407 |
],
|
@@ -420,6 +458,7 @@
|
|
420 |
" \"Manglore\": \"Mangalore\",\n",
|
421 |
" \"Pimpri Chinchwad\": \"Pimpri-Chinchwad\",\n",
|
422 |
" \"Tumakuru\": \"Tumidih\",\n",
|
|
|
423 |
" \"Tiruppur\": \"Tirupur\",\n",
|
424 |
" \"Yamuna Nagar\": \"Yamunanagar\",\n",
|
425 |
" \"vellore\": \"Vellore\" # duplicate, can map to itself or be handled separately\n",
|
@@ -438,48 +477,49 @@
|
|
438 |
},
|
439 |
{
|
440 |
"cell_type": "code",
|
441 |
-
"execution_count":
|
442 |
"metadata": {},
|
443 |
"outputs": [
|
444 |
{
|
445 |
"data": {
|
446 |
"text/plain": [
|
447 |
"State\n",
|
448 |
-
"
|
449 |
-
"
|
450 |
-
"
|
451 |
-
"
|
452 |
-
"
|
453 |
-
"
|
454 |
-
"
|
455 |
-
"
|
456 |
-
"
|
457 |
-
"
|
458 |
-
"
|
459 |
-
"
|
460 |
-
"
|
461 |
-
"
|
462 |
-
"
|
463 |
-
"
|
464 |
-
"
|
465 |
-
"
|
466 |
-
"
|
467 |
-
"
|
468 |
-
"
|
469 |
-
"
|
470 |
-
"
|
471 |
-
"
|
472 |
-
"
|
473 |
-
"
|
474 |
-
"
|
475 |
-
"
|
476 |
-
"
|
477 |
-
"
|
478 |
-
"
|
|
|
479 |
"Name: count, dtype: int64"
|
480 |
]
|
481 |
},
|
482 |
-
"execution_count":
|
483 |
"metadata": {},
|
484 |
"output_type": "execute_result"
|
485 |
}
|
@@ -593,6 +633,7 @@
|
|
593 |
" 'Solapur': 'Maharashtra',\n",
|
594 |
" 'Sonipat': 'Haryana',\n",
|
595 |
" 'Sri Ganganagar': 'Rajasthan',\n",
|
|
|
596 |
" 'Srinagar': 'Jammu and Kashmir',\n",
|
597 |
" 'Suakati': 'Odisha',\n",
|
598 |
" 'Surat': 'Gujarat',\n",
|
@@ -631,7 +672,7 @@
|
|
631 |
},
|
632 |
{
|
633 |
"cell_type": "code",
|
634 |
-
"execution_count":
|
635 |
"metadata": {},
|
636 |
"outputs": [
|
637 |
{
|
@@ -694,12 +735,12 @@
|
|
694 |
"0 Agra 417 PM\\n2.5 Severe \n",
|
695 |
"1 Bengaluru 95 PM , PM\\n2.5 10 Satisfactory \n",
|
696 |
"\n",
|
697 |
-
"
|
698 |
-
"0
|
699 |
-
"1
|
700 |
]
|
701 |
},
|
702 |
-
"execution_count":
|
703 |
"metadata": {},
|
704 |
"output_type": "execute_result"
|
705 |
}
|
@@ -712,16 +753,16 @@
|
|
712 |
},
|
713 |
{
|
714 |
"cell_type": "code",
|
715 |
-
"execution_count":
|
716 |
"metadata": {},
|
717 |
"outputs": [
|
718 |
{
|
719 |
"data": {
|
720 |
"text/plain": [
|
721 |
-
"
|
722 |
]
|
723 |
},
|
724 |
-
"execution_count":
|
725 |
"metadata": {},
|
726 |
"output_type": "execute_result"
|
727 |
}
|
@@ -732,15 +773,15 @@
|
|
732 |
},
|
733 |
{
|
734 |
"cell_type": "code",
|
735 |
-
"execution_count":
|
736 |
"metadata": {},
|
737 |
"outputs": [
|
738 |
{
|
739 |
"name": "stdout",
|
740 |
"output_type": "stream",
|
741 |
"text": [
|
742 |
-
"
|
743 |
-
"
|
744 |
]
|
745 |
}
|
746 |
],
|
@@ -753,7 +794,7 @@
|
|
753 |
},
|
754 |
{
|
755 |
"cell_type": "code",
|
756 |
-
"execution_count":
|
757 |
"metadata": {},
|
758 |
"outputs": [],
|
759 |
"source": [
|
@@ -763,7 +804,7 @@
|
|
763 |
],
|
764 |
"metadata": {
|
765 |
"kernelspec": {
|
766 |
-
"display_name": "
|
767 |
"language": "python",
|
768 |
"name": "python3"
|
769 |
},
|
@@ -777,7 +818,7 @@
|
|
777 |
"name": "python",
|
778 |
"nbconvert_exporter": "python",
|
779 |
"pygments_lexer": "ipython3",
|
780 |
-
"version": "3.
|
781 |
}
|
782 |
},
|
783 |
"nbformat": 4,
|
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
"metadata": {},
|
7 |
"outputs": [],
|
8 |
"source": [
|
|
|
20 |
},
|
21 |
{
|
22 |
"cell_type": "code",
|
23 |
+
"execution_count": 5,
|
24 |
"metadata": {},
|
25 |
"outputs": [],
|
26 |
"source": [
|
|
|
34 |
" file_exists, file_path, file_name = check_exists(date)\n",
|
35 |
" if file_exists:\n",
|
36 |
" return file_path\n",
|
37 |
+
"\n",
|
38 |
+
" os.makedirs(\"AQI_data\", exist_ok=True)\n",
|
39 |
+
"\n",
|
40 |
" url = f\"https://cpcb.nic.in//upload/Downloads/{file_name}\"\n",
|
41 |
" response = requests.get(url)\n",
|
42 |
" if response.status_code == 200:\n",
|
|
|
50 |
},
|
51 |
{
|
52 |
"cell_type": "code",
|
53 |
+
"execution_count": 6,
|
54 |
"metadata": {},
|
55 |
"outputs": [
|
56 |
{
|
|
|
61 |
" '2016-01-05', '2016-01-06', '2016-01-07', '2016-01-08',\n",
|
62 |
" '2016-01-09', '2016-01-10',\n",
|
63 |
" ...\n",
|
64 |
+
" '2025-02-17', '2025-02-18', '2025-02-19', '2025-02-20',\n",
|
65 |
+
" '2025-02-21', '2025-02-22', '2025-02-23', '2025-02-24',\n",
|
66 |
+
" '2025-02-25', '2025-02-26'],\n",
|
67 |
+
" dtype='datetime64[ns]', length=3345, freq='D')\n"
|
68 |
]
|
69 |
},
|
70 |
{
|
71 |
"data": {
|
72 |
"text/plain": [
|
73 |
+
"(None, 3345)"
|
74 |
]
|
75 |
},
|
76 |
+
"execution_count": 6,
|
77 |
"metadata": {},
|
78 |
"output_type": "execute_result"
|
79 |
}
|
|
|
86 |
},
|
87 |
{
|
88 |
"cell_type": "code",
|
89 |
+
"execution_count": 7,
|
90 |
"metadata": {},
|
91 |
"outputs": [
|
92 |
{
|
|
|
94 |
"output_type": "stream",
|
95 |
"text": [
|
96 |
"Failed to download https://cpcb.nic.in//upload/Downloads/AQI_Bulletin_20160606.pdf with status code 404\n",
|
97 |
+
"Failed to download https://cpcb.nic.in//upload/Downloads/AQI_Bulletin_20170618.pdf with status code 404\n",
|
98 |
"Failed to download https://cpcb.nic.in//upload/Downloads/AQI_Bulletin_20171014.pdf with status code 404\n",
|
99 |
+
"Failed to download https://cpcb.nic.in//upload/Downloads/AQI_Bulletin_20250101.pdf with status code 404\n"
|
100 |
]
|
101 |
}
|
102 |
],
|
|
|
107 |
},
|
108 |
{
|
109 |
"cell_type": "code",
|
110 |
+
"execution_count": 8,
|
111 |
"metadata": {},
|
112 |
"outputs": [
|
113 |
{
|
114 |
"name": "stdout",
|
115 |
"output_type": "stream",
|
116 |
"text": [
|
117 |
+
"3345\n"
|
118 |
]
|
119 |
},
|
120 |
{
|
121 |
"data": {
|
122 |
"text/plain": [
|
123 |
+
"3341"
|
124 |
]
|
125 |
},
|
126 |
+
"execution_count": 8,
|
127 |
"metadata": {},
|
128 |
"output_type": "execute_result"
|
129 |
}
|
|
|
136 |
},
|
137 |
{
|
138 |
"cell_type": "code",
|
139 |
+
"execution_count": 9,
|
140 |
"metadata": {},
|
141 |
"outputs": [
|
142 |
{
|
143 |
"data": {
|
144 |
"application/vnd.jupyter.widget-view+json": {
|
145 |
+
"model_id": "438a2a0c07fb4367b18a4deff93364e9",
|
146 |
"version_major": 2,
|
147 |
"version_minor": 0
|
148 |
},
|
149 |
"text/plain": [
|
150 |
+
" 0%| | 0/3345 [00:00<?, ?it/s]"
|
151 |
]
|
152 |
},
|
153 |
"metadata": {},
|
154 |
"output_type": "display_data"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
155 |
}
|
156 |
],
|
157 |
"source": [
|
|
|
299 |
" raise ValueError(\"Table pattern not recognized\")\n",
|
300 |
"\n",
|
301 |
"def process_file(date):\n",
|
302 |
+
" folders = [\"AQI_data\", \"AQI_data_csv\"]\n",
|
303 |
+
"\n",
|
304 |
+
" for folder in folders:\n",
|
305 |
+
" if not os.path.exists(folder):\n",
|
306 |
+
" os.makedirs(folder)\n",
|
307 |
" file_exists, file_path, file_name = check_exists(date)\n",
|
308 |
" if not file_exists:\n",
|
309 |
" print(f\"File {file_name} does not exist\")\n",
|
|
|
359 |
"_ = Parallel(48)(delayed(process_file)(file_path) for file_path in tqdm(dates))"
|
360 |
]
|
361 |
},
|
362 |
+
{
|
363 |
+
"cell_type": "markdown",
|
364 |
+
"metadata": {},
|
365 |
+
"source": [
|
366 |
+
"## Creating Merged DataFrame"
|
367 |
+
]
|
368 |
+
},
|
369 |
+
{
|
370 |
+
"cell_type": "code",
|
371 |
+
"execution_count": 10,
|
372 |
+
"metadata": {},
|
373 |
+
"outputs": [
|
374 |
+
{
|
375 |
+
"name": "stdout",
|
376 |
+
"output_type": "stream",
|
377 |
+
"text": [
|
378 |
+
"Merged CSV saved as AQI_data_csv/merged.csv\n"
|
379 |
+
]
|
380 |
+
}
|
381 |
+
],
|
382 |
+
"source": [
|
383 |
+
"import os\n",
|
384 |
+
"import pandas as pd\n",
|
385 |
+
"\n",
|
386 |
+
"def merge_csv_files(folder_path, output_file):\n",
|
387 |
+
" csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]\n",
|
388 |
+
" \n",
|
389 |
+
" if not csv_files:\n",
|
390 |
+
" print(\"No CSV files found in the folder.\")\n",
|
391 |
+
" return\n",
|
392 |
+
"\n",
|
393 |
+
" df_list = [pd.read_csv(os.path.join(folder_path, file)) for file in csv_files]\n",
|
394 |
+
" merged_df = pd.concat(df_list, ignore_index=True)\n",
|
395 |
+
"\n",
|
396 |
+
" merged_df.to_csv(output_file, index=False)\n",
|
397 |
+
" print(f\"Merged CSV saved as {output_file}\")\n",
|
398 |
+
"\n",
|
399 |
+
"# Example usage\n",
|
400 |
+
"folder_path = \"AQI_data_csv\"\n",
|
401 |
+
"output_file = \"AQI_data_csv/merged.csv\"\n",
|
402 |
+
"merge_csv_files(folder_path, output_file)"
|
403 |
+
]
|
404 |
+
},
|
405 |
{
|
406 |
"cell_type": "markdown",
|
407 |
"metadata": {},
|
|
|
411 |
},
|
412 |
{
|
413 |
"cell_type": "code",
|
414 |
+
"execution_count": 11,
|
415 |
"metadata": {},
|
416 |
"outputs": [
|
417 |
{
|
418 |
"data": {
|
419 |
"text/plain": [
|
420 |
+
"397732"
|
421 |
]
|
422 |
},
|
423 |
+
"execution_count": 11,
|
424 |
"metadata": {},
|
425 |
"output_type": "execute_result"
|
426 |
}
|
|
|
432 |
},
|
433 |
{
|
434 |
"cell_type": "code",
|
435 |
+
"execution_count": 12,
|
436 |
"metadata": {},
|
437 |
"outputs": [
|
438 |
{
|
439 |
"name": "stdout",
|
440 |
"output_type": "stream",
|
441 |
"text": [
|
442 |
+
"['Agartala', 'Agra', 'Ahmedabad', 'Ahmednagar', 'Aizawl', 'Ajmer', 'Akola', 'Alwar', 'Ambala', 'Amravati', 'Amritsar', 'Anantapur', 'Angul', 'Ankleshwar', 'Araria', 'Ariyalur', 'Arrah', 'Asansol', 'Aurangabad (Bihar)', 'Aurangabad(Maharashtra)', 'Baddi', 'Badlapur', 'Bagalkot', 'Baghpat', 'Bahadurgarh', 'Balasore', 'Ballabgarh', 'Banswara', 'Baran', 'Barbil', 'Bareilly', 'Baripada', 'Barmer', 'Barrackpore', 'Bathinda', 'Begusarai', 'Belapur', 'Belgaum', 'Bengaluru', 'Bettiah', 'Bhagalpur', 'Bharatpur', 'Bhilai', 'Bhilwara', 'Bhiwadi', 'Bhiwandi', 'Bhiwani', 'Bhopal', 'Bhubaneswar', 'Bidar', 'Bihar Sharif', 'Bikaner', 'Bilaspur', 'Bileipada', 'Boisar', 'Brajrajnagar', 'Bulandshahr', 'Bundi', 'Buxar', 'Byasanagar', 'Byrnihat', 'Chamarajanagar', 'Chandigarh', 'Chandrapur', 'Charkhi Dadri', 'Chengalpattu', 'Chennai', 'Chhal', 'Chhapra', 'Chikkaballapur', 'Chikkamagaluru', 'Chittoor', 'Chittorgarh', 'Churu', 'Coimbtore', 'Cuddalore', 'Cuttack', 'Damoh', 'Darbhanga', 'Dausa', 'Davanagere', 'Dehradun', 'Delhi', 'Dewas', 'Dhanbad', 'Dharuhera', 'Dharwad', 'Dholpur', 'Dhule', 'Dindigul', 'Dungarpur', 'Durgapur', 'Eloor', 'Ernakulam', 'Faridabad', 'Fatehabad', 'Firozabad', 'Gadag', 'Gandhinagar', 'Gangtok', 'Gaya', 'Ghaziabad', 'Gorakhpur', 'Greater_Noida', 'Gummidipoondi', 'Gurugram', 'Guwahati', 'Gwalior', 'Hajipur', 'Haldia', 'Hanumangarh', 'Hapur', 'Hassan', 'Haveri', 'Hisar', 'Hosur', 'Howrah', 'Hubballi', 'Hyderabad', 'Imphal', 'Indore', 'Jabalpur', 'Jaipur', 'Jaisalmer', 'Jalandhar', 'Jalgaon', 'Jalna', 'Jalore', 'Jhalawar', 'Jhansi', 'Jharsuguda', 'Jhunjhunu', 'Jind', 'Jodhpur', 'Jorapokhar', 'Kadapa', 'Kaithal', 'Kalaburgi', 'Kalyan', 'Kanchipuram', 'Kannur', 'Kanpur', 'Karauli', 'Karnal', 'Karur', 'Karwar', 'Kashipur', 'Katihar', 'Katni', 'Keonjhar', 'Khanna', 'Khurja', 'Kishanganj', 'Kochi', 'Kohima', 'Kolar', 'Kolhapur', 'Kolkata', 'Kollam', 'Koppal', 'Korba', 'Kota', 'Kozhikode', 'Kunjemura', 'Kurushketra', 'Latur', 'Loni_Ghaziabad', 'Lucknow', 'Ludhiana', 'Madurai', 'Mahad', 'Maihar', 'Malegaon', 'Mandi Gobindgarh', 'Mandideep', 'Mandikhera', 'Manesar', 'Mangalore', 'Manguraha', 'Medikeri', 'Meerut', 'Milupara', 'Mira-Bhayandar', 'Moradabad', 'Motihari', 'Mumbai', 'Munger', 'Muzaffarnagar', 'Muzaffarpur', 'Mysuru', 'NOIDA', 'Nagaon', 'Nagapattinam', 'Nagaur', 'Nagpur', 'Naharlagun', 'Nalbari', 'Nanded', 'Nandesari', 'Narnaul', 'Nashik', 'Navi Mumbai', 'Nayagarh', 'Noida', 'Ooty', 'Pali', 'Palkalaiperur', 'Palwal', 'Panchkula', 'Panipat', 'Parbhani', 'Pathardih', 'Patiala', 'Patna', 'Pimpri-Chinchwad', 'Pithampur', 'Pratapgarh', 'Prayagraj', 'Puducherry', 'Pudukottai', 'Pune', 'Purnia', 'Raichur', 'Raipur', 'Rairangpur', 'Rajamahendravaram', 'Rajgir', 'Rajsamand', 'Ramanagara', 'Ramanathapuram', 'Ranipet', 'Ratlam', 'Rishikesh', 'Rohtak', 'Rourkela', 'Rupnagar', 'Sagar', 'Saharsa', 'Salem', 'Samastipur', 'Sangli', 'Sasaram', 'Satna', 'Sawai Madhopur', 'Shillong', 'Shivamogga', 'Sikar', 'Silchar', 'Siliguri', 'Singrauli', 'Sirohi', 'Sirsa', 'Sivasagar', 'Siwan', 'Solapur', 'Sonipat', 'Sri Ganganagar', 'Sri Vijaya Puram', 'Srinagar', 'Suakati', 'Surat', 'Talcher', 'Tensa', 'Thane', 'Thanjavur', 'Thiruvananthapuram', 'Thoothukudi', 'Thrissur', 'Tiruchirappalli', 'Tirunelveli', 'Tirupati', 'Tirupur', 'Tonk', 'Tumidih', 'Udaipur', 'Udupi', 'Ujjain', 'Ulhasnagar', 'Vapi', 'Varanasi', 'Vatva', 'Vellore', 'Vijayapura', 'Vijayawada', 'Virar', 'Virudhunagar', 'Visakhapatnam', 'Vrindavan', 'Yadgir', 'Yamunanagar']\n"
|
443 |
]
|
444 |
}
|
445 |
],
|
|
|
458 |
" \"Manglore\": \"Mangalore\",\n",
|
459 |
" \"Pimpri Chinchwad\": \"Pimpri-Chinchwad\",\n",
|
460 |
" \"Tumakuru\": \"Tumidih\",\n",
|
461 |
+
" \"Tirumala\": \"Tirupati\",\n",
|
462 |
" \"Tiruppur\": \"Tirupur\",\n",
|
463 |
" \"Yamuna Nagar\": \"Yamunanagar\",\n",
|
464 |
" \"vellore\": \"Vellore\" # duplicate, can map to itself or be handled separately\n",
|
|
|
477 |
},
|
478 |
{
|
479 |
"cell_type": "code",
|
480 |
+
"execution_count": 13,
|
481 |
"metadata": {},
|
482 |
"outputs": [
|
483 |
{
|
484 |
"data": {
|
485 |
"text/plain": [
|
486 |
"State\n",
|
487 |
+
"Andaman and Nicobar 6\n",
|
488 |
+
"Andhra Pradesh 11546\n",
|
489 |
+
"Arunachal Pradesh 614\n",
|
490 |
+
"Assam 5099\n",
|
491 |
+
"Bihar 28633\n",
|
492 |
+
"Chandigarh 1980\n",
|
493 |
+
"Chhattisgarh 5357\n",
|
494 |
+
"Delhi 3330\n",
|
495 |
+
"Gujarat 12195\n",
|
496 |
+
"Haryana 50177\n",
|
497 |
+
"Himachal Pradesh 1022\n",
|
498 |
+
"Jammu and Kashmir 822\n",
|
499 |
+
"Jharkhand 2076\n",
|
500 |
+
"Karnataka 35248\n",
|
501 |
+
"Kerala 11549\n",
|
502 |
+
"Madhya Pradesh 31326\n",
|
503 |
+
"Maharashtra 39193\n",
|
504 |
+
"Manipur 790\n",
|
505 |
+
"Meghalaya 1956\n",
|
506 |
+
"Mizoram 1535\n",
|
507 |
+
"Nagaland 1398\n",
|
508 |
+
"Odisha 12363\n",
|
509 |
+
"Puducherry 1433\n",
|
510 |
+
"Punjab 19676\n",
|
511 |
+
"Rajasthan 37729\n",
|
512 |
+
"Sikkim 812\n",
|
513 |
+
"Tamil Nadu 14080\n",
|
514 |
+
"Telangana 3322\n",
|
515 |
+
"Tripura 1442\n",
|
516 |
+
"Uttar Pradesh 41800\n",
|
517 |
+
"Uttarakhand 2156\n",
|
518 |
+
"West Bengal 15406\n",
|
519 |
"Name: count, dtype: int64"
|
520 |
]
|
521 |
},
|
522 |
+
"execution_count": 13,
|
523 |
"metadata": {},
|
524 |
"output_type": "execute_result"
|
525 |
}
|
|
|
633 |
" 'Solapur': 'Maharashtra',\n",
|
634 |
" 'Sonipat': 'Haryana',\n",
|
635 |
" 'Sri Ganganagar': 'Rajasthan',\n",
|
636 |
+
" 'Sri Vijaya Puram': 'Andaman and Nicobar',\n",
|
637 |
" 'Srinagar': 'Jammu and Kashmir',\n",
|
638 |
" 'Suakati': 'Odisha',\n",
|
639 |
" 'Surat': 'Gujarat',\n",
|
|
|
672 |
},
|
673 |
{
|
674 |
"cell_type": "code",
|
675 |
+
"execution_count": 14,
|
676 |
"metadata": {},
|
677 |
"outputs": [
|
678 |
{
|
|
|
735 |
"0 Agra 417 PM\\n2.5 Severe \n",
|
736 |
"1 Bengaluru 95 PM , PM\\n2.5 10 Satisfactory \n",
|
737 |
"\n",
|
738 |
+
" Based on number of monitoring stations Date State \n",
|
739 |
+
"0 1 2016-01-01 Uttar Pradesh \n",
|
740 |
+
"1 5 2016-01-01 Karnataka "
|
741 |
]
|
742 |
},
|
743 |
+
"execution_count": 14,
|
744 |
"metadata": {},
|
745 |
"output_type": "execute_result"
|
746 |
}
|
|
|
753 |
},
|
754 |
{
|
755 |
"cell_type": "code",
|
756 |
+
"execution_count": 15,
|
757 |
"metadata": {},
|
758 |
"outputs": [
|
759 |
{
|
760 |
"data": {
|
761 |
"text/plain": [
|
762 |
+
"396071"
|
763 |
]
|
764 |
},
|
765 |
+
"execution_count": 15,
|
766 |
"metadata": {},
|
767 |
"output_type": "execute_result"
|
768 |
}
|
|
|
773 |
},
|
774 |
{
|
775 |
"cell_type": "code",
|
776 |
+
"execution_count": 16,
|
777 |
"metadata": {},
|
778 |
"outputs": [
|
779 |
{
|
780 |
"name": "stdout",
|
781 |
"output_type": "stream",
|
782 |
"text": [
|
783 |
+
"396071\n",
|
784 |
+
"395213\n"
|
785 |
]
|
786 |
}
|
787 |
],
|
|
|
794 |
},
|
795 |
{
|
796 |
"cell_type": "code",
|
797 |
+
"execution_count": 17,
|
798 |
"metadata": {},
|
799 |
"outputs": [],
|
800 |
"source": [
|
|
|
804 |
],
|
805 |
"metadata": {
|
806 |
"kernelspec": {
|
807 |
+
"display_name": "Python 3",
|
808 |
"language": "python",
|
809 |
"name": "python3"
|
810 |
},
|
|
|
818 |
"name": "python",
|
819 |
"nbconvert_exporter": "python",
|
820 |
"pygments_lexer": "ipython3",
|
821 |
+
"version": "3.12.4"
|
822 |
}
|
823 |
},
|
824 |
"nbformat": 4,
|