Upload standardize-eu.py
Browse files- standardize-eu.py +84 -0
standardize-eu.py
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
|
3 |
+
city_to_country = {
|
4 |
+
# Austria
|
5 |
+
"Vienna": "Austria", "Graz": "Austria", "Linz": "Austria",
|
6 |
+
# Belgium
|
7 |
+
"Brussels": "Belgium", "Antwerp": "Belgium", "Ghent": "Belgium",
|
8 |
+
# Bulgaria
|
9 |
+
"Sofia": "Bulgaria", "Plovdiv": "Bulgaria", "Varna": "Bulgaria",
|
10 |
+
# Croatia
|
11 |
+
"Zagreb": "Croatia", "Split": "Croatia", "Rijeka": "Croatia",
|
12 |
+
# Cyprus
|
13 |
+
"Nicosia": "Cyprus", "Limassol": "Cyprus", "Larnaca": "Cyprus",
|
14 |
+
# Czech Republic
|
15 |
+
"Prague": "Czech Republic", "Brno": "Czech Republic", "Ostrava": "Czech Republic",
|
16 |
+
# Denmark
|
17 |
+
"Copenhagen": "Denmark", "Aarhus": "Denmark", "Odense": "Denmark",
|
18 |
+
# Estonia
|
19 |
+
"Tallinn": "Estonia", "Tartu": "Estonia", "Narva": "Estonia",
|
20 |
+
# Finland
|
21 |
+
"Helsinki": "Finland", "Espoo": "Finland", "Tampere": "Finland",
|
22 |
+
# France
|
23 |
+
"Paris": "France", "Marseille": "France", "Lyon": "France",
|
24 |
+
# Germany
|
25 |
+
"Berlin": "Germany", "Munich": "Germany", "Frankfurt": "Germany",
|
26 |
+
# Greece
|
27 |
+
"Athens": "Greece", "Thessaloniki": "Greece", "Patras": "Greece",
|
28 |
+
# Hungary
|
29 |
+
"Budapest": "Hungary", "Debrecen": "Hungary", "Szeged": "Hungary",
|
30 |
+
# Ireland
|
31 |
+
"Dublin": "Ireland", "Cork": "Ireland", "Limerick": "Ireland",
|
32 |
+
# Italy
|
33 |
+
"Rome": "Italy", "Milan": "Italy", "Naples": "Italy",
|
34 |
+
# Latvia
|
35 |
+
"Riga": "Latvia", "Daugavpils": "Latvia", "Liepāja": "Latvia",
|
36 |
+
# Lithuania
|
37 |
+
"Vilnius": "Lithuania", "Kaunas": "Lithuania", "Klaipėda": "Lithuania",
|
38 |
+
# Luxembourg
|
39 |
+
"Luxembourg": "Luxembourg",
|
40 |
+
# Malta
|
41 |
+
"Valletta": "Malta", "Birkirkara": "Malta", "Qormi": "Malta",
|
42 |
+
# Netherlands
|
43 |
+
"Amsterdam": "Netherlands", "Rotterdam": "Netherlands", "The Hague": "Netherlands",
|
44 |
+
# Poland
|
45 |
+
"Warsaw": "Poland", "Krakow": "Poland", "Lodz": "Poland",
|
46 |
+
# Portugal
|
47 |
+
"Lisbon": "Portugal", "Porto": "Portugal", "Vila Nova de Gaia": "Portugal",
|
48 |
+
# Romania
|
49 |
+
"Bucharest": "Romania", "Cluj-Napoca": "Romania", "Timisoara": "Romania",
|
50 |
+
# Slovakia
|
51 |
+
"Bratislava": "Slovakia", "Kosice": "Slovakia", "Prešov": "Slovakia",
|
52 |
+
# Slovenia
|
53 |
+
"Ljubljana": "Slovenia", "Maribor": "Slovenia", "Celje": "Slovenia",
|
54 |
+
# Spain
|
55 |
+
"Madrid": "Spain", "Barcelona": "Spain", "Valencia": "Spain",
|
56 |
+
# Sweden
|
57 |
+
"Stockholm": "Sweden", "Gothenburg": "Sweden", "Malmo": "Sweden",
|
58 |
+
}
|
59 |
+
|
60 |
+
eu_countries = ["Austria", "Belgium", "Bulgaria", "Croatia", "Cyprus", "Czech Republic", "Denmark", "Estonia", "Finland", "France", "Germany", "Greece", "Hungary", "Ireland", "Italy", "Latvia", "Lithuania", "Luxembourg", "Malta", "Netherlands", "Poland", "Portugal", "Romania", "Slovakia", "Slovenia", "Spain", "Sweden"]
|
61 |
+
|
62 |
+
df = pd.read_csv('data/2020-climate-all.csv')
|
63 |
+
|
64 |
+
def get_eu_country(location):
|
65 |
+
if not isinstance(location, str):
|
66 |
+
return None
|
67 |
+
|
68 |
+
# check for country first
|
69 |
+
for country in eu_countries:
|
70 |
+
if country in location:
|
71 |
+
return country
|
72 |
+
|
73 |
+
for city, country in city_to_country.items():
|
74 |
+
if city in location:
|
75 |
+
return country
|
76 |
+
|
77 |
+
return None
|
78 |
+
|
79 |
+
df['Country'] = df['User Location'].apply(get_eu_country)
|
80 |
+
|
81 |
+
# filter rows where 'Country' is not blank
|
82 |
+
filtered_df = df[df['Country'].notna() & (df['Country'] != '')]
|
83 |
+
|
84 |
+
filtered_df.to_csv('data/2020-climate-eu.csv', index=False)
|