File size: 3,206 Bytes
9f05e5e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import pandas as pd

city_to_country = {
    # Austria
    "Vienna": "Austria", "Graz": "Austria", "Linz": "Austria",
    # Belgium
    "Brussels": "Belgium", "Antwerp": "Belgium", "Ghent": "Belgium",
    # Bulgaria
    "Sofia": "Bulgaria", "Plovdiv": "Bulgaria", "Varna": "Bulgaria",
    # Croatia
    "Zagreb": "Croatia", "Split": "Croatia", "Rijeka": "Croatia",
    # Cyprus
    "Nicosia": "Cyprus", "Limassol": "Cyprus", "Larnaca": "Cyprus",
    # Czech Republic
    "Prague": "Czech Republic", "Brno": "Czech Republic", "Ostrava": "Czech Republic",
    # Denmark
    "Copenhagen": "Denmark", "Aarhus": "Denmark", "Odense": "Denmark",
    # Estonia
    "Tallinn": "Estonia", "Tartu": "Estonia", "Narva": "Estonia",
    # Finland
    "Helsinki": "Finland", "Espoo": "Finland", "Tampere": "Finland",
    # France
    "Paris": "France", "Marseille": "France", "Lyon": "France",
    # Germany
    "Berlin": "Germany", "Munich": "Germany", "Frankfurt": "Germany",
    # Greece
    "Athens": "Greece", "Thessaloniki": "Greece", "Patras": "Greece",
    # Hungary
    "Budapest": "Hungary", "Debrecen": "Hungary", "Szeged": "Hungary",
    # Ireland
    "Dublin": "Ireland", "Cork": "Ireland", "Limerick": "Ireland",
    # Italy
    "Rome": "Italy", "Milan": "Italy", "Naples": "Italy",
    # Latvia
    "Riga": "Latvia", "Daugavpils": "Latvia", "Liepāja": "Latvia",
    # Lithuania
    "Vilnius": "Lithuania", "Kaunas": "Lithuania", "Klaipėda": "Lithuania",
    # Luxembourg
    "Luxembourg": "Luxembourg",
    # Malta
    "Valletta": "Malta", "Birkirkara": "Malta", "Qormi": "Malta",
    # Netherlands
    "Amsterdam": "Netherlands", "Rotterdam": "Netherlands", "The Hague": "Netherlands",
    # Poland
    "Warsaw": "Poland", "Krakow": "Poland", "Lodz": "Poland",
    # Portugal
    "Lisbon": "Portugal", "Porto": "Portugal", "Vila Nova de Gaia": "Portugal",
    # Romania
    "Bucharest": "Romania", "Cluj-Napoca": "Romania", "Timisoara": "Romania",
    # Slovakia
    "Bratislava": "Slovakia", "Kosice": "Slovakia", "Prešov": "Slovakia",
    # Slovenia
    "Ljubljana": "Slovenia", "Maribor": "Slovenia", "Celje": "Slovenia",
    # Spain
    "Madrid": "Spain", "Barcelona": "Spain", "Valencia": "Spain",
    # Sweden
    "Stockholm": "Sweden", "Gothenburg": "Sweden", "Malmo": "Sweden",
}

eu_countries = ["Austria", "Belgium", "Bulgaria", "Croatia", "Cyprus", "Czech Republic", "Denmark", "Estonia", "Finland", "France", "Germany", "Greece", "Hungary", "Ireland", "Italy", "Latvia", "Lithuania", "Luxembourg", "Malta", "Netherlands", "Poland", "Portugal", "Romania", "Slovakia", "Slovenia", "Spain", "Sweden"]

df = pd.read_csv('data/2020-climate-all.csv')

def get_eu_country(location):
  if not isinstance(location, str):
    return None
  
  # check for country first
  for country in eu_countries:
      if country in location:
          return country

  for city, country in city_to_country.items():
      if city in location:
          return country
  
  return None

df['Country'] = df['User Location'].apply(get_eu_country)

# filter rows where 'Country' is not blank
filtered_df = df[df['Country'].notna() & (df['Country'] != '')]

filtered_df.to_csv('data/2020-climate-eu.csv', index=False)