|
import pandas as pd |
|
|
|
city_to_country = { |
|
|
|
"Vienna": "Austria", "Graz": "Austria", "Linz": "Austria", |
|
|
|
"Brussels": "Belgium", "Antwerp": "Belgium", "Ghent": "Belgium", |
|
|
|
"Sofia": "Bulgaria", "Plovdiv": "Bulgaria", "Varna": "Bulgaria", |
|
|
|
"Zagreb": "Croatia", "Split": "Croatia", "Rijeka": "Croatia", |
|
|
|
"Nicosia": "Cyprus", "Limassol": "Cyprus", "Larnaca": "Cyprus", |
|
|
|
"Prague": "Czech Republic", "Brno": "Czech Republic", "Ostrava": "Czech Republic", |
|
|
|
"Copenhagen": "Denmark", "Aarhus": "Denmark", "Odense": "Denmark", |
|
|
|
"Tallinn": "Estonia", "Tartu": "Estonia", "Narva": "Estonia", |
|
|
|
"Helsinki": "Finland", "Espoo": "Finland", "Tampere": "Finland", |
|
|
|
"Paris": "France", "Marseille": "France", "Lyon": "France", |
|
|
|
"Berlin": "Germany", "Munich": "Germany", "Frankfurt": "Germany", |
|
|
|
"Athens": "Greece", "Thessaloniki": "Greece", "Patras": "Greece", |
|
|
|
"Budapest": "Hungary", "Debrecen": "Hungary", "Szeged": "Hungary", |
|
|
|
"Dublin": "Ireland", "Cork": "Ireland", "Limerick": "Ireland", |
|
|
|
"Rome": "Italy", "Milan": "Italy", "Naples": "Italy", |
|
|
|
"Riga": "Latvia", "Daugavpils": "Latvia", "Liepāja": "Latvia", |
|
|
|
"Vilnius": "Lithuania", "Kaunas": "Lithuania", "Klaipėda": "Lithuania", |
|
|
|
"Luxembourg": "Luxembourg", |
|
|
|
"Valletta": "Malta", "Birkirkara": "Malta", "Qormi": "Malta", |
|
|
|
"Amsterdam": "Netherlands", "Rotterdam": "Netherlands", "The Hague": "Netherlands", |
|
|
|
"Warsaw": "Poland", "Krakow": "Poland", "Lodz": "Poland", |
|
|
|
"Lisbon": "Portugal", "Porto": "Portugal", "Vila Nova de Gaia": "Portugal", |
|
|
|
"Bucharest": "Romania", "Cluj-Napoca": "Romania", "Timisoara": "Romania", |
|
|
|
"Bratislava": "Slovakia", "Kosice": "Slovakia", "Prešov": "Slovakia", |
|
|
|
"Ljubljana": "Slovenia", "Maribor": "Slovenia", "Celje": "Slovenia", |
|
|
|
"Madrid": "Spain", "Barcelona": "Spain", "Valencia": "Spain", |
|
|
|
"Stockholm": "Sweden", "Gothenburg": "Sweden", "Malmo": "Sweden", |
|
} |
|
|
|
eu_countries = ["Austria", "Belgium", "Bulgaria", "Croatia", "Cyprus", "Czech Republic", "Denmark", "Estonia", "Finland", "France", "Germany", "Greece", "Hungary", "Ireland", "Italy", "Latvia", "Lithuania", "Luxembourg", "Malta", "Netherlands", "Poland", "Portugal", "Romania", "Slovakia", "Slovenia", "Spain", "Sweden"] |
|
|
|
df = pd.read_csv('data/2020-climate-all.csv') |
|
|
|
def get_eu_country(location): |
|
if not isinstance(location, str): |
|
return None |
|
|
|
|
|
for country in eu_countries: |
|
if country in location: |
|
return country |
|
|
|
for city, country in city_to_country.items(): |
|
if city in location: |
|
return country |
|
|
|
return None |
|
|
|
df['Country'] = df['User Location'].apply(get_eu_country) |
|
|
|
|
|
filtered_df = df[df['Country'].notna() & (df['Country'] != '')] |
|
|
|
filtered_df.to_csv('data/2020-climate-eu.csv', index=False) |