climate-greta-effect / standardize-eu.py
vibha-mah's picture
Upload standardize-eu.py
9f05e5e
import pandas as pd
city_to_country = {
# Austria
"Vienna": "Austria", "Graz": "Austria", "Linz": "Austria",
# Belgium
"Brussels": "Belgium", "Antwerp": "Belgium", "Ghent": "Belgium",
# Bulgaria
"Sofia": "Bulgaria", "Plovdiv": "Bulgaria", "Varna": "Bulgaria",
# Croatia
"Zagreb": "Croatia", "Split": "Croatia", "Rijeka": "Croatia",
# Cyprus
"Nicosia": "Cyprus", "Limassol": "Cyprus", "Larnaca": "Cyprus",
# Czech Republic
"Prague": "Czech Republic", "Brno": "Czech Republic", "Ostrava": "Czech Republic",
# Denmark
"Copenhagen": "Denmark", "Aarhus": "Denmark", "Odense": "Denmark",
# Estonia
"Tallinn": "Estonia", "Tartu": "Estonia", "Narva": "Estonia",
# Finland
"Helsinki": "Finland", "Espoo": "Finland", "Tampere": "Finland",
# France
"Paris": "France", "Marseille": "France", "Lyon": "France",
# Germany
"Berlin": "Germany", "Munich": "Germany", "Frankfurt": "Germany",
# Greece
"Athens": "Greece", "Thessaloniki": "Greece", "Patras": "Greece",
# Hungary
"Budapest": "Hungary", "Debrecen": "Hungary", "Szeged": "Hungary",
# Ireland
"Dublin": "Ireland", "Cork": "Ireland", "Limerick": "Ireland",
# Italy
"Rome": "Italy", "Milan": "Italy", "Naples": "Italy",
# Latvia
"Riga": "Latvia", "Daugavpils": "Latvia", "Liepāja": "Latvia",
# Lithuania
"Vilnius": "Lithuania", "Kaunas": "Lithuania", "Klaipėda": "Lithuania",
# Luxembourg
"Luxembourg": "Luxembourg",
# Malta
"Valletta": "Malta", "Birkirkara": "Malta", "Qormi": "Malta",
# Netherlands
"Amsterdam": "Netherlands", "Rotterdam": "Netherlands", "The Hague": "Netherlands",
# Poland
"Warsaw": "Poland", "Krakow": "Poland", "Lodz": "Poland",
# Portugal
"Lisbon": "Portugal", "Porto": "Portugal", "Vila Nova de Gaia": "Portugal",
# Romania
"Bucharest": "Romania", "Cluj-Napoca": "Romania", "Timisoara": "Romania",
# Slovakia
"Bratislava": "Slovakia", "Kosice": "Slovakia", "Prešov": "Slovakia",
# Slovenia
"Ljubljana": "Slovenia", "Maribor": "Slovenia", "Celje": "Slovenia",
# Spain
"Madrid": "Spain", "Barcelona": "Spain", "Valencia": "Spain",
# Sweden
"Stockholm": "Sweden", "Gothenburg": "Sweden", "Malmo": "Sweden",
}
eu_countries = ["Austria", "Belgium", "Bulgaria", "Croatia", "Cyprus", "Czech Republic", "Denmark", "Estonia", "Finland", "France", "Germany", "Greece", "Hungary", "Ireland", "Italy", "Latvia", "Lithuania", "Luxembourg", "Malta", "Netherlands", "Poland", "Portugal", "Romania", "Slovakia", "Slovenia", "Spain", "Sweden"]
df = pd.read_csv('data/2020-climate-all.csv')
def get_eu_country(location):
if not isinstance(location, str):
return None
# check for country first
for country in eu_countries:
if country in location:
return country
for city, country in city_to_country.items():
if city in location:
return country
return None
df['Country'] = df['User Location'].apply(get_eu_country)
# filter rows where 'Country' is not blank
filtered_df = df[df['Country'].notna() & (df['Country'] != '')]
filtered_df.to_csv('data/2020-climate-eu.csv', index=False)