vibha-mah commited on
Commit
9f05e5e
·
1 Parent(s): 19fe72c

Upload standardize-eu.py

Browse files
Files changed (1) hide show
  1. standardize-eu.py +84 -0
standardize-eu.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ city_to_country = {
4
+ # Austria
5
+ "Vienna": "Austria", "Graz": "Austria", "Linz": "Austria",
6
+ # Belgium
7
+ "Brussels": "Belgium", "Antwerp": "Belgium", "Ghent": "Belgium",
8
+ # Bulgaria
9
+ "Sofia": "Bulgaria", "Plovdiv": "Bulgaria", "Varna": "Bulgaria",
10
+ # Croatia
11
+ "Zagreb": "Croatia", "Split": "Croatia", "Rijeka": "Croatia",
12
+ # Cyprus
13
+ "Nicosia": "Cyprus", "Limassol": "Cyprus", "Larnaca": "Cyprus",
14
+ # Czech Republic
15
+ "Prague": "Czech Republic", "Brno": "Czech Republic", "Ostrava": "Czech Republic",
16
+ # Denmark
17
+ "Copenhagen": "Denmark", "Aarhus": "Denmark", "Odense": "Denmark",
18
+ # Estonia
19
+ "Tallinn": "Estonia", "Tartu": "Estonia", "Narva": "Estonia",
20
+ # Finland
21
+ "Helsinki": "Finland", "Espoo": "Finland", "Tampere": "Finland",
22
+ # France
23
+ "Paris": "France", "Marseille": "France", "Lyon": "France",
24
+ # Germany
25
+ "Berlin": "Germany", "Munich": "Germany", "Frankfurt": "Germany",
26
+ # Greece
27
+ "Athens": "Greece", "Thessaloniki": "Greece", "Patras": "Greece",
28
+ # Hungary
29
+ "Budapest": "Hungary", "Debrecen": "Hungary", "Szeged": "Hungary",
30
+ # Ireland
31
+ "Dublin": "Ireland", "Cork": "Ireland", "Limerick": "Ireland",
32
+ # Italy
33
+ "Rome": "Italy", "Milan": "Italy", "Naples": "Italy",
34
+ # Latvia
35
+ "Riga": "Latvia", "Daugavpils": "Latvia", "Liepāja": "Latvia",
36
+ # Lithuania
37
+ "Vilnius": "Lithuania", "Kaunas": "Lithuania", "Klaipėda": "Lithuania",
38
+ # Luxembourg
39
+ "Luxembourg": "Luxembourg",
40
+ # Malta
41
+ "Valletta": "Malta", "Birkirkara": "Malta", "Qormi": "Malta",
42
+ # Netherlands
43
+ "Amsterdam": "Netherlands", "Rotterdam": "Netherlands", "The Hague": "Netherlands",
44
+ # Poland
45
+ "Warsaw": "Poland", "Krakow": "Poland", "Lodz": "Poland",
46
+ # Portugal
47
+ "Lisbon": "Portugal", "Porto": "Portugal", "Vila Nova de Gaia": "Portugal",
48
+ # Romania
49
+ "Bucharest": "Romania", "Cluj-Napoca": "Romania", "Timisoara": "Romania",
50
+ # Slovakia
51
+ "Bratislava": "Slovakia", "Kosice": "Slovakia", "Prešov": "Slovakia",
52
+ # Slovenia
53
+ "Ljubljana": "Slovenia", "Maribor": "Slovenia", "Celje": "Slovenia",
54
+ # Spain
55
+ "Madrid": "Spain", "Barcelona": "Spain", "Valencia": "Spain",
56
+ # Sweden
57
+ "Stockholm": "Sweden", "Gothenburg": "Sweden", "Malmo": "Sweden",
58
+ }
59
+
60
+ eu_countries = ["Austria", "Belgium", "Bulgaria", "Croatia", "Cyprus", "Czech Republic", "Denmark", "Estonia", "Finland", "France", "Germany", "Greece", "Hungary", "Ireland", "Italy", "Latvia", "Lithuania", "Luxembourg", "Malta", "Netherlands", "Poland", "Portugal", "Romania", "Slovakia", "Slovenia", "Spain", "Sweden"]
61
+
62
+ df = pd.read_csv('data/2020-climate-all.csv')
63
+
64
+ def get_eu_country(location):
65
+ if not isinstance(location, str):
66
+ return None
67
+
68
+ # check for country first
69
+ for country in eu_countries:
70
+ if country in location:
71
+ return country
72
+
73
+ for city, country in city_to_country.items():
74
+ if city in location:
75
+ return country
76
+
77
+ return None
78
+
79
+ df['Country'] = df['User Location'].apply(get_eu_country)
80
+
81
+ # filter rows where 'Country' is not blank
82
+ filtered_df = df[df['Country'].notna() & (df['Country'] != '')]
83
+
84
+ filtered_df.to_csv('data/2020-climate-eu.csv', index=False)