Fixed AQI Downloader Python Notebook
Browse filesExceptions were being thrown when AQI_data and AQI_data_csv Folder did not preexist so code has been added to Add these Folders if not existing.
Created CSV Merging Code to create merged.csv in AQI_data_csv Folder.
Added Tirumala (Tirupati) to City Mapping
Added Sri Vijaya Puram (Previously Port Blair) to City to State Mapping (Andaman And Nicobar)
Notebook Also uses Data updated as of 27-02-2025 23:15
Note: Need to Add xarray to requirements.txt
It is an Optional dependency of Pandas but was used in the Last Cell.
- aqi_downloader.ipynb +125 -84
aqi_downloader.ipynb
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
-
"execution_count":
|
6 |
"metadata": {},
|
7 |
"outputs": [],
|
8 |
"source": [
|
@@ -20,7 +20,7 @@
|
|
20 |
},
|
21 |
{
|
22 |
"cell_type": "code",
|
23 |
-
"execution_count":
|
24 |
"metadata": {},
|
25 |
"outputs": [],
|
26 |
"source": [
|
@@ -34,7 +34,9 @@
|
|
34 |
" file_exists, file_path, file_name = check_exists(date)\n",
|
35 |
" if file_exists:\n",
|
36 |
" return file_path\n",
|
37 |
-
"
|
|
|
|
|
38 |
" url = f\"https://cpcb.nic.in//upload/Downloads/{file_name}\"\n",
|
39 |
" response = requests.get(url)\n",
|
40 |
" if response.status_code == 200:\n",
|
@@ -48,7 +50,7 @@
|
|
48 |
},
|
49 |
{
|
50 |
"cell_type": "code",
|
51 |
-
"execution_count":
|
52 |
"metadata": {},
|
53 |
"outputs": [
|
54 |
{
|
@@ -59,19 +61,19 @@
|
|
59 |
" '2016-01-05', '2016-01-06', '2016-01-07', '2016-01-08',\n",
|
60 |
" '2016-01-09', '2016-01-10',\n",
|
61 |
" ...\n",
|
62 |
-
" '
|
63 |
-
" '
|
64 |
-
" '
|
65 |
-
" dtype='datetime64[ns]', length=
|
66 |
]
|
67 |
},
|
68 |
{
|
69 |
"data": {
|
70 |
"text/plain": [
|
71 |
-
"(None,
|
72 |
]
|
73 |
},
|
74 |
-
"execution_count":
|
75 |
"metadata": {},
|
76 |
"output_type": "execute_result"
|
77 |
}
|
@@ -84,7 +86,7 @@
|
|
84 |
},
|
85 |
{
|
86 |
"cell_type": "code",
|
87 |
-
"execution_count":
|
88 |
"metadata": {},
|
89 |
"outputs": [
|
90 |
{
|
@@ -92,8 +94,9 @@
|
|
92 |
"output_type": "stream",
|
93 |
"text": [
|
94 |
"Failed to download https://cpcb.nic.in//upload/Downloads/AQI_Bulletin_20160606.pdf with status code 404\n",
|
|
|
95 |
"Failed to download https://cpcb.nic.in//upload/Downloads/AQI_Bulletin_20171014.pdf with status code 404\n",
|
96 |
-
"Failed to download https://cpcb.nic.in//upload/Downloads/
|
97 |
]
|
98 |
}
|
99 |
],
|
@@ -104,23 +107,23 @@
|
|
104 |
},
|
105 |
{
|
106 |
"cell_type": "code",
|
107 |
-
"execution_count":
|
108 |
"metadata": {},
|
109 |
"outputs": [
|
110 |
{
|
111 |
"name": "stdout",
|
112 |
"output_type": "stream",
|
113 |
"text": [
|
114 |
-
"
|
115 |
]
|
116 |
},
|
117 |
{
|
118 |
"data": {
|
119 |
"text/plain": [
|
120 |
-
"
|
121 |
]
|
122 |
},
|
123 |
-
"execution_count":
|
124 |
"metadata": {},
|
125 |
"output_type": "execute_result"
|
126 |
}
|
@@ -133,35 +136,22 @@
|
|
133 |
},
|
134 |
{
|
135 |
"cell_type": "code",
|
136 |
-
"execution_count":
|
137 |
"metadata": {},
|
138 |
"outputs": [
|
139 |
{
|
140 |
"data": {
|
141 |
"application/vnd.jupyter.widget-view+json": {
|
142 |
-
"model_id": "
|
143 |
"version_major": 2,
|
144 |
"version_minor": 0
|
145 |
},
|
146 |
"text/plain": [
|
147 |
-
" 0%| | 0/
|
148 |
]
|
149 |
},
|
150 |
"metadata": {},
|
151 |
"output_type": "display_data"
|
152 |
-
},
|
153 |
-
{
|
154 |
-
"name": "stdout",
|
155 |
-
"output_type": "stream",
|
156 |
-
"text": [
|
157 |
-
"File AQI_Bulletin_20160606.pdf does not exist\n",
|
158 |
-
"No tables found in AQI_data/AQI_Bulletin_20160704.pdf\n",
|
159 |
-
"No tables found in AQI_data/AQI_Bulletin_20160721.pdf\n",
|
160 |
-
"No tables found in AQI_data/AQI_Bulletin_20160723.pdf\n",
|
161 |
-
"No tables found in AQI_data/AQI_Bulletin_20160722.pdf\n",
|
162 |
-
"File AQI_Bulletin_20170618.pdf does not exist\n",
|
163 |
-
"File AQI_Bulletin_20171014.pdf does not exist\n"
|
164 |
-
]
|
165 |
}
|
166 |
],
|
167 |
"source": [
|
@@ -309,6 +299,11 @@
|
|
309 |
" raise ValueError(\"Table pattern not recognized\")\n",
|
310 |
"\n",
|
311 |
"def process_file(date):\n",
|
|
|
|
|
|
|
|
|
|
|
312 |
" file_exists, file_path, file_name = check_exists(date)\n",
|
313 |
" if not file_exists:\n",
|
314 |
" print(f\"File {file_name} does not exist\")\n",
|
@@ -364,6 +359,49 @@
|
|
364 |
"_ = Parallel(48)(delayed(process_file)(file_path) for file_path in tqdm(dates))"
|
365 |
]
|
366 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
367 |
{
|
368 |
"cell_type": "markdown",
|
369 |
"metadata": {},
|
@@ -373,16 +411,16 @@
|
|
373 |
},
|
374 |
{
|
375 |
"cell_type": "code",
|
376 |
-
"execution_count":
|
377 |
"metadata": {},
|
378 |
"outputs": [
|
379 |
{
|
380 |
"data": {
|
381 |
"text/plain": [
|
382 |
-
"
|
383 |
]
|
384 |
},
|
385 |
-
"execution_count":
|
386 |
"metadata": {},
|
387 |
"output_type": "execute_result"
|
388 |
}
|
@@ -394,14 +432,14 @@
|
|
394 |
},
|
395 |
{
|
396 |
"cell_type": "code",
|
397 |
-
"execution_count":
|
398 |
"metadata": {},
|
399 |
"outputs": [
|
400 |
{
|
401 |
"name": "stdout",
|
402 |
"output_type": "stream",
|
403 |
"text": [
|
404 |
-
"['Agartala', 'Agra', 'Ahmedabad', 'Ahmednagar', 'Aizawl', 'Ajmer', 'Akola', 'Alwar', 'Ambala', 'Amravati', 'Amritsar', 'Anantapur', 'Angul', 'Ankleshwar', 'Araria', 'Ariyalur', 'Arrah', 'Asansol', 'Aurangabad (Bihar)', 'Aurangabad(Maharashtra)', 'Baddi', 'Badlapur', 'Bagalkot', 'Baghpat', 'Bahadurgarh', 'Balasore', 'Ballabgarh', 'Banswara', 'Baran', 'Barbil', 'Bareilly', 'Baripada', 'Barmer', 'Barrackpore', 'Bathinda', 'Begusarai', 'Belapur', 'Belgaum', 'Bengaluru', 'Bettiah', 'Bhagalpur', 'Bharatpur', 'Bhilai', 'Bhilwara', 'Bhiwadi', 'Bhiwandi', 'Bhiwani', 'Bhopal', 'Bhubaneswar', 'Bidar', 'Bihar Sharif', 'Bikaner', 'Bilaspur', 'Bileipada', 'Boisar', 'Brajrajnagar', 'Bulandshahr', 'Bundi', 'Buxar', 'Byasanagar', 'Byrnihat', 'Chamarajanagar', 'Chandigarh', 'Chandrapur', 'Charkhi Dadri', 'Chengalpattu', 'Chennai', 'Chhal', 'Chhapra', 'Chikkaballapur', 'Chikkamagaluru', 'Chittoor', 'Chittorgarh', 'Churu', 'Coimbtore', 'Cuddalore', 'Cuttack', 'Damoh', 'Darbhanga', 'Dausa', 'Davanagere', 'Dehradun', 'Delhi', 'Dewas', 'Dhanbad', 'Dharuhera', 'Dharwad', 'Dholpur', 'Dhule', 'Dindigul', 'Dungarpur', 'Durgapur', 'Eloor', 'Ernakulam', 'Faridabad', 'Fatehabad', 'Firozabad', 'Gadag', 'Gandhinagar', 'Gangtok', 'Gaya', 'Ghaziabad', 'Gorakhpur', 'Greater_Noida', 'Gummidipoondi', 'Gurugram', 'Guwahati', 'Gwalior', 'Hajipur', 'Haldia', 'Hanumangarh', 'Hapur', 'Hassan', 'Haveri', 'Hisar', 'Hosur', 'Howrah', 'Hubballi', 'Hyderabad', 'Imphal', 'Indore', 'Jabalpur', 'Jaipur', 'Jaisalmer', 'Jalandhar', 'Jalgaon', 'Jalna', 'Jalore', 'Jhalawar', 'Jhansi', 'Jharsuguda', 'Jhunjhunu', 'Jind', 'Jodhpur', 'Jorapokhar', 'Kadapa', 'Kaithal', 'Kalaburgi', 'Kalyan', 'Kanchipuram', 'Kannur', 'Kanpur', 'Karauli', 'Karnal', 'Karur', 'Karwar', 'Kashipur', 'Katihar', 'Katni', 'Keonjhar', 'Khanna', 'Khurja', 'Kishanganj', 'Kochi', 'Kohima', 'Kolar', 'Kolhapur', 'Kolkata', 'Kollam', 'Koppal', 'Korba', 'Kota', 'Kozhikode', 'Kunjemura', 'Kurushketra', 'Latur', 'Loni_Ghaziabad', 'Lucknow', 'Ludhiana', 'Madurai', 'Mahad', 'Maihar', 'Malegaon', 'Mandi Gobindgarh', 'Mandideep', 'Mandikhera', 'Manesar', 'Mangalore', 'Manguraha', 'Medikeri', 'Meerut', 'Milupara', 'Mira-Bhayandar', 'Moradabad', 'Motihari', 'Mumbai', 'Munger', 'Muzaffarnagar', 'Muzaffarpur', 'Mysuru', 'NOIDA', 'Nagaon', 'Nagapattinam', 'Nagaur', 'Nagpur', 'Naharlagun', 'Nalbari', 'Nanded', 'Nandesari', 'Narnaul', 'Nashik', 'Navi Mumbai', 'Nayagarh', 'Noida', 'Ooty', 'Pali', 'Palkalaiperur', 'Palwal', 'Panchkula', 'Panipat', 'Parbhani', 'Pathardih', 'Patiala', 'Patna', 'Pimpri-Chinchwad', 'Pithampur', 'Pratapgarh', 'Prayagraj', 'Puducherry', 'Pudukottai', 'Pune', 'Purnia', 'Raichur', 'Raipur', 'Rairangpur', 'Rajamahendravaram', 'Rajgir', 'Rajsamand', 'Ramanagara', 'Ramanathapuram', 'Ranipet', 'Ratlam', 'Rishikesh', 'Rohtak', 'Rourkela', 'Rupnagar', 'Sagar', 'Saharsa', 'Salem', 'Samastipur', 'Sangli', 'Sasaram', 'Satna', 'Sawai Madhopur', 'Shillong', 'Shivamogga', 'Sikar', 'Silchar', 'Siliguri', 'Singrauli', 'Sirohi', 'Sirsa', 'Sivasagar', 'Siwan', 'Solapur', 'Sonipat', 'Sri Ganganagar', 'Srinagar', 'Suakati', 'Surat', 'Talcher', 'Tensa', 'Thane', 'Thanjavur', 'Thiruvananthapuram', 'Thoothukudi', 'Thrissur', 'Tiruchirappalli', 'Tirunelveli', 'Tirupati', 'Tirupur', 'Tonk', 'Tumidih', 'Udaipur', 'Udupi', 'Ujjain', 'Ulhasnagar', 'Vapi', 'Varanasi', 'Vatva', 'Vellore', 'Vijayapura', 'Vijayawada', 'Virar', 'Virudhunagar', 'Visakhapatnam', 'Vrindavan', 'Yadgir', 'Yamunanagar']\n"
|
405 |
]
|
406 |
}
|
407 |
],
|
@@ -420,6 +458,7 @@
|
|
420 |
" \"Manglore\": \"Mangalore\",\n",
|
421 |
" \"Pimpri Chinchwad\": \"Pimpri-Chinchwad\",\n",
|
422 |
" \"Tumakuru\": \"Tumidih\",\n",
|
|
|
423 |
" \"Tiruppur\": \"Tirupur\",\n",
|
424 |
" \"Yamuna Nagar\": \"Yamunanagar\",\n",
|
425 |
" \"vellore\": \"Vellore\" # duplicate, can map to itself or be handled separately\n",
|
@@ -438,48 +477,49 @@
|
|
438 |
},
|
439 |
{
|
440 |
"cell_type": "code",
|
441 |
-
"execution_count":
|
442 |
"metadata": {},
|
443 |
"outputs": [
|
444 |
{
|
445 |
"data": {
|
446 |
"text/plain": [
|
447 |
"State\n",
|
448 |
-
"
|
449 |
-
"
|
450 |
-
"
|
451 |
-
"
|
452 |
-
"
|
453 |
-
"
|
454 |
-
"
|
455 |
-
"
|
456 |
-
"
|
457 |
-
"
|
458 |
-
"
|
459 |
-
"
|
460 |
-
"
|
461 |
-
"
|
462 |
-
"
|
463 |
-
"
|
464 |
-
"
|
465 |
-
"
|
466 |
-
"
|
467 |
-
"
|
468 |
-
"
|
469 |
-
"
|
470 |
-
"
|
471 |
-
"
|
472 |
-
"
|
473 |
-
"
|
474 |
-
"
|
475 |
-
"
|
476 |
-
"
|
477 |
-
"
|
478 |
-
"
|
|
|
479 |
"Name: count, dtype: int64"
|
480 |
]
|
481 |
},
|
482 |
-
"execution_count":
|
483 |
"metadata": {},
|
484 |
"output_type": "execute_result"
|
485 |
}
|
@@ -593,6 +633,7 @@
|
|
593 |
" 'Solapur': 'Maharashtra',\n",
|
594 |
" 'Sonipat': 'Haryana',\n",
|
595 |
" 'Sri Ganganagar': 'Rajasthan',\n",
|
|
|
596 |
" 'Srinagar': 'Jammu and Kashmir',\n",
|
597 |
" 'Suakati': 'Odisha',\n",
|
598 |
" 'Surat': 'Gujarat',\n",
|
@@ -631,7 +672,7 @@
|
|
631 |
},
|
632 |
{
|
633 |
"cell_type": "code",
|
634 |
-
"execution_count":
|
635 |
"metadata": {},
|
636 |
"outputs": [
|
637 |
{
|
@@ -694,12 +735,12 @@
|
|
694 |
"0 Agra 417 PM\\n2.5 Severe \n",
|
695 |
"1 Bengaluru 95 PM , PM\\n2.5 10 Satisfactory \n",
|
696 |
"\n",
|
697 |
-
"
|
698 |
-
"0
|
699 |
-
"1
|
700 |
]
|
701 |
},
|
702 |
-
"execution_count":
|
703 |
"metadata": {},
|
704 |
"output_type": "execute_result"
|
705 |
}
|
@@ -712,16 +753,16 @@
|
|
712 |
},
|
713 |
{
|
714 |
"cell_type": "code",
|
715 |
-
"execution_count":
|
716 |
"metadata": {},
|
717 |
"outputs": [
|
718 |
{
|
719 |
"data": {
|
720 |
"text/plain": [
|
721 |
-
"
|
722 |
]
|
723 |
},
|
724 |
-
"execution_count":
|
725 |
"metadata": {},
|
726 |
"output_type": "execute_result"
|
727 |
}
|
@@ -732,15 +773,15 @@
|
|
732 |
},
|
733 |
{
|
734 |
"cell_type": "code",
|
735 |
-
"execution_count":
|
736 |
"metadata": {},
|
737 |
"outputs": [
|
738 |
{
|
739 |
"name": "stdout",
|
740 |
"output_type": "stream",
|
741 |
"text": [
|
742 |
-
"
|
743 |
-
"
|
744 |
]
|
745 |
}
|
746 |
],
|
@@ -753,7 +794,7 @@
|
|
753 |
},
|
754 |
{
|
755 |
"cell_type": "code",
|
756 |
-
"execution_count":
|
757 |
"metadata": {},
|
758 |
"outputs": [],
|
759 |
"source": [
|
@@ -763,7 +804,7 @@
|
|
763 |
],
|
764 |
"metadata": {
|
765 |
"kernelspec": {
|
766 |
-
"display_name": "
|
767 |
"language": "python",
|
768 |
"name": "python3"
|
769 |
},
|
@@ -777,7 +818,7 @@
|
|
777 |
"name": "python",
|
778 |
"nbconvert_exporter": "python",
|
779 |
"pygments_lexer": "ipython3",
|
780 |
-
"version": "3.
|
781 |
}
|
782 |
},
|
783 |
"nbformat": 4,
|
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
"metadata": {},
|
7 |
"outputs": [],
|
8 |
"source": [
|
|
|
20 |
},
|
21 |
{
|
22 |
"cell_type": "code",
|
23 |
+
"execution_count": 5,
|
24 |
"metadata": {},
|
25 |
"outputs": [],
|
26 |
"source": [
|
|
|
34 |
" file_exists, file_path, file_name = check_exists(date)\n",
|
35 |
" if file_exists:\n",
|
36 |
" return file_path\n",
|
37 |
+
"\n",
|
38 |
+
" os.makedirs(\"AQI_data\", exist_ok=True)\n",
|
39 |
+
"\n",
|
40 |
" url = f\"https://cpcb.nic.in//upload/Downloads/{file_name}\"\n",
|
41 |
" response = requests.get(url)\n",
|
42 |
" if response.status_code == 200:\n",
|
|
|
50 |
},
|
51 |
{
|
52 |
"cell_type": "code",
|
53 |
+
"execution_count": 6,
|
54 |
"metadata": {},
|
55 |
"outputs": [
|
56 |
{
|
|
|
61 |
" '2016-01-05', '2016-01-06', '2016-01-07', '2016-01-08',\n",
|
62 |
" '2016-01-09', '2016-01-10',\n",
|
63 |
" ...\n",
|
64 |
+
" '2025-02-17', '2025-02-18', '2025-02-19', '2025-02-20',\n",
|
65 |
+
" '2025-02-21', '2025-02-22', '2025-02-23', '2025-02-24',\n",
|
66 |
+
" '2025-02-25', '2025-02-26'],\n",
|
67 |
+
" dtype='datetime64[ns]', length=3345, freq='D')\n"
|
68 |
]
|
69 |
},
|
70 |
{
|
71 |
"data": {
|
72 |
"text/plain": [
|
73 |
+
"(None, 3345)"
|
74 |
]
|
75 |
},
|
76 |
+
"execution_count": 6,
|
77 |
"metadata": {},
|
78 |
"output_type": "execute_result"
|
79 |
}
|
|
|
86 |
},
|
87 |
{
|
88 |
"cell_type": "code",
|
89 |
+
"execution_count": 7,
|
90 |
"metadata": {},
|
91 |
"outputs": [
|
92 |
{
|
|
|
94 |
"output_type": "stream",
|
95 |
"text": [
|
96 |
"Failed to download https://cpcb.nic.in//upload/Downloads/AQI_Bulletin_20160606.pdf with status code 404\n",
|
97 |
+
"Failed to download https://cpcb.nic.in//upload/Downloads/AQI_Bulletin_20170618.pdf with status code 404\n",
|
98 |
"Failed to download https://cpcb.nic.in//upload/Downloads/AQI_Bulletin_20171014.pdf with status code 404\n",
|
99 |
+
"Failed to download https://cpcb.nic.in//upload/Downloads/AQI_Bulletin_20250101.pdf with status code 404\n"
|
100 |
]
|
101 |
}
|
102 |
],
|
|
|
107 |
},
|
108 |
{
|
109 |
"cell_type": "code",
|
110 |
+
"execution_count": 8,
|
111 |
"metadata": {},
|
112 |
"outputs": [
|
113 |
{
|
114 |
"name": "stdout",
|
115 |
"output_type": "stream",
|
116 |
"text": [
|
117 |
+
"3345\n"
|
118 |
]
|
119 |
},
|
120 |
{
|
121 |
"data": {
|
122 |
"text/plain": [
|
123 |
+
"3341"
|
124 |
]
|
125 |
},
|
126 |
+
"execution_count": 8,
|
127 |
"metadata": {},
|
128 |
"output_type": "execute_result"
|
129 |
}
|
|
|
136 |
},
|
137 |
{
|
138 |
"cell_type": "code",
|
139 |
+
"execution_count": 9,
|
140 |
"metadata": {},
|
141 |
"outputs": [
|
142 |
{
|
143 |
"data": {
|
144 |
"application/vnd.jupyter.widget-view+json": {
|
145 |
+
"model_id": "438a2a0c07fb4367b18a4deff93364e9",
|
146 |
"version_major": 2,
|
147 |
"version_minor": 0
|
148 |
},
|
149 |
"text/plain": [
|
150 |
+
" 0%| | 0/3345 [00:00<?, ?it/s]"
|
151 |
]
|
152 |
},
|
153 |
"metadata": {},
|
154 |
"output_type": "display_data"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
155 |
}
|
156 |
],
|
157 |
"source": [
|
|
|
299 |
" raise ValueError(\"Table pattern not recognized\")\n",
|
300 |
"\n",
|
301 |
"def process_file(date):\n",
|
302 |
+
" folders = [\"AQI_data\", \"AQI_data_csv\"]\n",
|
303 |
+
"\n",
|
304 |
+
" for folder in folders:\n",
|
305 |
+
" if not os.path.exists(folder):\n",
|
306 |
+
" os.makedirs(folder)\n",
|
307 |
" file_exists, file_path, file_name = check_exists(date)\n",
|
308 |
" if not file_exists:\n",
|
309 |
" print(f\"File {file_name} does not exist\")\n",
|
|
|
359 |
"_ = Parallel(48)(delayed(process_file)(file_path) for file_path in tqdm(dates))"
|
360 |
]
|
361 |
},
|
362 |
+
{
|
363 |
+
"cell_type": "markdown",
|
364 |
+
"metadata": {},
|
365 |
+
"source": [
|
366 |
+
"## Creating Merged DataFrame"
|
367 |
+
]
|
368 |
+
},
|
369 |
+
{
|
370 |
+
"cell_type": "code",
|
371 |
+
"execution_count": 10,
|
372 |
+
"metadata": {},
|
373 |
+
"outputs": [
|
374 |
+
{
|
375 |
+
"name": "stdout",
|
376 |
+
"output_type": "stream",
|
377 |
+
"text": [
|
378 |
+
"Merged CSV saved as AQI_data_csv/merged.csv\n"
|
379 |
+
]
|
380 |
+
}
|
381 |
+
],
|
382 |
+
"source": [
|
383 |
+
"import os\n",
|
384 |
+
"import pandas as pd\n",
|
385 |
+
"\n",
|
386 |
+
"def merge_csv_files(folder_path, output_file):\n",
|
387 |
+
" csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]\n",
|
388 |
+
" \n",
|
389 |
+
" if not csv_files:\n",
|
390 |
+
" print(\"No CSV files found in the folder.\")\n",
|
391 |
+
" return\n",
|
392 |
+
"\n",
|
393 |
+
" df_list = [pd.read_csv(os.path.join(folder_path, file)) for file in csv_files]\n",
|
394 |
+
" merged_df = pd.concat(df_list, ignore_index=True)\n",
|
395 |
+
"\n",
|
396 |
+
" merged_df.to_csv(output_file, index=False)\n",
|
397 |
+
" print(f\"Merged CSV saved as {output_file}\")\n",
|
398 |
+
"\n",
|
399 |
+
"# Example usage\n",
|
400 |
+
"folder_path = \"AQI_data_csv\"\n",
|
401 |
+
"output_file = \"AQI_data_csv/merged.csv\"\n",
|
402 |
+
"merge_csv_files(folder_path, output_file)"
|
403 |
+
]
|
404 |
+
},
|
405 |
{
|
406 |
"cell_type": "markdown",
|
407 |
"metadata": {},
|
|
|
411 |
},
|
412 |
{
|
413 |
"cell_type": "code",
|
414 |
+
"execution_count": 11,
|
415 |
"metadata": {},
|
416 |
"outputs": [
|
417 |
{
|
418 |
"data": {
|
419 |
"text/plain": [
|
420 |
+
"397732"
|
421 |
]
|
422 |
},
|
423 |
+
"execution_count": 11,
|
424 |
"metadata": {},
|
425 |
"output_type": "execute_result"
|
426 |
}
|
|
|
432 |
},
|
433 |
{
|
434 |
"cell_type": "code",
|
435 |
+
"execution_count": 12,
|
436 |
"metadata": {},
|
437 |
"outputs": [
|
438 |
{
|
439 |
"name": "stdout",
|
440 |
"output_type": "stream",
|
441 |
"text": [
|
442 |
+
"['Agartala', 'Agra', 'Ahmedabad', 'Ahmednagar', 'Aizawl', 'Ajmer', 'Akola', 'Alwar', 'Ambala', 'Amravati', 'Amritsar', 'Anantapur', 'Angul', 'Ankleshwar', 'Araria', 'Ariyalur', 'Arrah', 'Asansol', 'Aurangabad (Bihar)', 'Aurangabad(Maharashtra)', 'Baddi', 'Badlapur', 'Bagalkot', 'Baghpat', 'Bahadurgarh', 'Balasore', 'Ballabgarh', 'Banswara', 'Baran', 'Barbil', 'Bareilly', 'Baripada', 'Barmer', 'Barrackpore', 'Bathinda', 'Begusarai', 'Belapur', 'Belgaum', 'Bengaluru', 'Bettiah', 'Bhagalpur', 'Bharatpur', 'Bhilai', 'Bhilwara', 'Bhiwadi', 'Bhiwandi', 'Bhiwani', 'Bhopal', 'Bhubaneswar', 'Bidar', 'Bihar Sharif', 'Bikaner', 'Bilaspur', 'Bileipada', 'Boisar', 'Brajrajnagar', 'Bulandshahr', 'Bundi', 'Buxar', 'Byasanagar', 'Byrnihat', 'Chamarajanagar', 'Chandigarh', 'Chandrapur', 'Charkhi Dadri', 'Chengalpattu', 'Chennai', 'Chhal', 'Chhapra', 'Chikkaballapur', 'Chikkamagaluru', 'Chittoor', 'Chittorgarh', 'Churu', 'Coimbtore', 'Cuddalore', 'Cuttack', 'Damoh', 'Darbhanga', 'Dausa', 'Davanagere', 'Dehradun', 'Delhi', 'Dewas', 'Dhanbad', 'Dharuhera', 'Dharwad', 'Dholpur', 'Dhule', 'Dindigul', 'Dungarpur', 'Durgapur', 'Eloor', 'Ernakulam', 'Faridabad', 'Fatehabad', 'Firozabad', 'Gadag', 'Gandhinagar', 'Gangtok', 'Gaya', 'Ghaziabad', 'Gorakhpur', 'Greater_Noida', 'Gummidipoondi', 'Gurugram', 'Guwahati', 'Gwalior', 'Hajipur', 'Haldia', 'Hanumangarh', 'Hapur', 'Hassan', 'Haveri', 'Hisar', 'Hosur', 'Howrah', 'Hubballi', 'Hyderabad', 'Imphal', 'Indore', 'Jabalpur', 'Jaipur', 'Jaisalmer', 'Jalandhar', 'Jalgaon', 'Jalna', 'Jalore', 'Jhalawar', 'Jhansi', 'Jharsuguda', 'Jhunjhunu', 'Jind', 'Jodhpur', 'Jorapokhar', 'Kadapa', 'Kaithal', 'Kalaburgi', 'Kalyan', 'Kanchipuram', 'Kannur', 'Kanpur', 'Karauli', 'Karnal', 'Karur', 'Karwar', 'Kashipur', 'Katihar', 'Katni', 'Keonjhar', 'Khanna', 'Khurja', 'Kishanganj', 'Kochi', 'Kohima', 'Kolar', 'Kolhapur', 'Kolkata', 'Kollam', 'Koppal', 'Korba', 'Kota', 'Kozhikode', 'Kunjemura', 'Kurushketra', 'Latur', 'Loni_Ghaziabad', 'Lucknow', 'Ludhiana', 'Madurai', 'Mahad', 'Maihar', 'Malegaon', 'Mandi Gobindgarh', 'Mandideep', 'Mandikhera', 'Manesar', 'Mangalore', 'Manguraha', 'Medikeri', 'Meerut', 'Milupara', 'Mira-Bhayandar', 'Moradabad', 'Motihari', 'Mumbai', 'Munger', 'Muzaffarnagar', 'Muzaffarpur', 'Mysuru', 'NOIDA', 'Nagaon', 'Nagapattinam', 'Nagaur', 'Nagpur', 'Naharlagun', 'Nalbari', 'Nanded', 'Nandesari', 'Narnaul', 'Nashik', 'Navi Mumbai', 'Nayagarh', 'Noida', 'Ooty', 'Pali', 'Palkalaiperur', 'Palwal', 'Panchkula', 'Panipat', 'Parbhani', 'Pathardih', 'Patiala', 'Patna', 'Pimpri-Chinchwad', 'Pithampur', 'Pratapgarh', 'Prayagraj', 'Puducherry', 'Pudukottai', 'Pune', 'Purnia', 'Raichur', 'Raipur', 'Rairangpur', 'Rajamahendravaram', 'Rajgir', 'Rajsamand', 'Ramanagara', 'Ramanathapuram', 'Ranipet', 'Ratlam', 'Rishikesh', 'Rohtak', 'Rourkela', 'Rupnagar', 'Sagar', 'Saharsa', 'Salem', 'Samastipur', 'Sangli', 'Sasaram', 'Satna', 'Sawai Madhopur', 'Shillong', 'Shivamogga', 'Sikar', 'Silchar', 'Siliguri', 'Singrauli', 'Sirohi', 'Sirsa', 'Sivasagar', 'Siwan', 'Solapur', 'Sonipat', 'Sri Ganganagar', 'Sri Vijaya Puram', 'Srinagar', 'Suakati', 'Surat', 'Talcher', 'Tensa', 'Thane', 'Thanjavur', 'Thiruvananthapuram', 'Thoothukudi', 'Thrissur', 'Tiruchirappalli', 'Tirunelveli', 'Tirupati', 'Tirupur', 'Tonk', 'Tumidih', 'Udaipur', 'Udupi', 'Ujjain', 'Ulhasnagar', 'Vapi', 'Varanasi', 'Vatva', 'Vellore', 'Vijayapura', 'Vijayawada', 'Virar', 'Virudhunagar', 'Visakhapatnam', 'Vrindavan', 'Yadgir', 'Yamunanagar']\n"
|
443 |
]
|
444 |
}
|
445 |
],
|
|
|
458 |
" \"Manglore\": \"Mangalore\",\n",
|
459 |
" \"Pimpri Chinchwad\": \"Pimpri-Chinchwad\",\n",
|
460 |
" \"Tumakuru\": \"Tumidih\",\n",
|
461 |
+
" \"Tirumala\": \"Tirupati\",\n",
|
462 |
" \"Tiruppur\": \"Tirupur\",\n",
|
463 |
" \"Yamuna Nagar\": \"Yamunanagar\",\n",
|
464 |
" \"vellore\": \"Vellore\" # duplicate, can map to itself or be handled separately\n",
|
|
|
477 |
},
|
478 |
{
|
479 |
"cell_type": "code",
|
480 |
+
"execution_count": 13,
|
481 |
"metadata": {},
|
482 |
"outputs": [
|
483 |
{
|
484 |
"data": {
|
485 |
"text/plain": [
|
486 |
"State\n",
|
487 |
+
"Andaman and Nicobar 6\n",
|
488 |
+
"Andhra Pradesh 11546\n",
|
489 |
+
"Arunachal Pradesh 614\n",
|
490 |
+
"Assam 5099\n",
|
491 |
+
"Bihar 28633\n",
|
492 |
+
"Chandigarh 1980\n",
|
493 |
+
"Chhattisgarh 5357\n",
|
494 |
+
"Delhi 3330\n",
|
495 |
+
"Gujarat 12195\n",
|
496 |
+
"Haryana 50177\n",
|
497 |
+
"Himachal Pradesh 1022\n",
|
498 |
+
"Jammu and Kashmir 822\n",
|
499 |
+
"Jharkhand 2076\n",
|
500 |
+
"Karnataka 35248\n",
|
501 |
+
"Kerala 11549\n",
|
502 |
+
"Madhya Pradesh 31326\n",
|
503 |
+
"Maharashtra 39193\n",
|
504 |
+
"Manipur 790\n",
|
505 |
+
"Meghalaya 1956\n",
|
506 |
+
"Mizoram 1535\n",
|
507 |
+
"Nagaland 1398\n",
|
508 |
+
"Odisha 12363\n",
|
509 |
+
"Puducherry 1433\n",
|
510 |
+
"Punjab 19676\n",
|
511 |
+
"Rajasthan 37729\n",
|
512 |
+
"Sikkim 812\n",
|
513 |
+
"Tamil Nadu 14080\n",
|
514 |
+
"Telangana 3322\n",
|
515 |
+
"Tripura 1442\n",
|
516 |
+
"Uttar Pradesh 41800\n",
|
517 |
+
"Uttarakhand 2156\n",
|
518 |
+
"West Bengal 15406\n",
|
519 |
"Name: count, dtype: int64"
|
520 |
]
|
521 |
},
|
522 |
+
"execution_count": 13,
|
523 |
"metadata": {},
|
524 |
"output_type": "execute_result"
|
525 |
}
|
|
|
633 |
" 'Solapur': 'Maharashtra',\n",
|
634 |
" 'Sonipat': 'Haryana',\n",
|
635 |
" 'Sri Ganganagar': 'Rajasthan',\n",
|
636 |
+
" 'Sri Vijaya Puram': 'Andaman and Nicobar',\n",
|
637 |
" 'Srinagar': 'Jammu and Kashmir',\n",
|
638 |
" 'Suakati': 'Odisha',\n",
|
639 |
" 'Surat': 'Gujarat',\n",
|
|
|
672 |
},
|
673 |
{
|
674 |
"cell_type": "code",
|
675 |
+
"execution_count": 14,
|
676 |
"metadata": {},
|
677 |
"outputs": [
|
678 |
{
|
|
|
735 |
"0 Agra 417 PM\\n2.5 Severe \n",
|
736 |
"1 Bengaluru 95 PM , PM\\n2.5 10 Satisfactory \n",
|
737 |
"\n",
|
738 |
+
" Based on number of monitoring stations Date State \n",
|
739 |
+
"0 1 2016-01-01 Uttar Pradesh \n",
|
740 |
+
"1 5 2016-01-01 Karnataka "
|
741 |
]
|
742 |
},
|
743 |
+
"execution_count": 14,
|
744 |
"metadata": {},
|
745 |
"output_type": "execute_result"
|
746 |
}
|
|
|
753 |
},
|
754 |
{
|
755 |
"cell_type": "code",
|
756 |
+
"execution_count": 15,
|
757 |
"metadata": {},
|
758 |
"outputs": [
|
759 |
{
|
760 |
"data": {
|
761 |
"text/plain": [
|
762 |
+
"396071"
|
763 |
]
|
764 |
},
|
765 |
+
"execution_count": 15,
|
766 |
"metadata": {},
|
767 |
"output_type": "execute_result"
|
768 |
}
|
|
|
773 |
},
|
774 |
{
|
775 |
"cell_type": "code",
|
776 |
+
"execution_count": 16,
|
777 |
"metadata": {},
|
778 |
"outputs": [
|
779 |
{
|
780 |
"name": "stdout",
|
781 |
"output_type": "stream",
|
782 |
"text": [
|
783 |
+
"396071\n",
|
784 |
+
"395213\n"
|
785 |
]
|
786 |
}
|
787 |
],
|
|
|
794 |
},
|
795 |
{
|
796 |
"cell_type": "code",
|
797 |
+
"execution_count": 17,
|
798 |
"metadata": {},
|
799 |
"outputs": [],
|
800 |
"source": [
|
|
|
804 |
],
|
805 |
"metadata": {
|
806 |
"kernelspec": {
|
807 |
+
"display_name": "Python 3",
|
808 |
"language": "python",
|
809 |
"name": "python3"
|
810 |
},
|
|
|
818 |
"name": "python",
|
819 |
"nbconvert_exporter": "python",
|
820 |
"pygments_lexer": "ipython3",
|
821 |
+
"version": "3.12.4"
|
822 |
}
|
823 |
},
|
824 |
"nbformat": 4,
|