gaia / data_pipelines /historical_weather_data.py
Hugo Massonnat
pipeline for historical weather data
b23cefb
raw
history blame
2.77 kB
import openmeteo_requests
import requests_cache
import pandas as pd
from retry_requests import retry
def download_historical_weather_data(
latitude: float,
longitude: float,
start_year: int,
end_year: int,
) -> pd.DataFrame:
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)
# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
"latitude": latitude,
"longitude": longitude,
"start_date": f"{start_year}-01-01",
"end_date": f"{end_year}-12-31",
"daily": ["temperature_2m_max", "temperature_2m_min", "temperature_2m_mean",
"precipitation_sum", "precipitation_hours",
"shortwave_radiation_sum",
"et0_fao_evapotranspiration"],
"timezone": "GMT"
}
responses = openmeteo.weather_api(url, params=params)
# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")
# Process daily data. The order of variables needs to be the same as requested.
daily = response.Daily()
daily_temperature_2m_max = daily.Variables(0).ValuesAsNumpy()
daily_temperature_2m_min = daily.Variables(1).ValuesAsNumpy()
daily_temperature_2m_mean = daily.Variables(2).ValuesAsNumpy()
daily_precipitation_sum = daily.Variables(3).ValuesAsNumpy()
daily_precipitation_hours = daily.Variables(4).ValuesAsNumpy()
daily_shortwave_radiation_sum = daily.Variables(5).ValuesAsNumpy()
daily_et0_fao_evapotranspiration = daily.Variables(6).ValuesAsNumpy()
daily_data = {
"date": pd.date_range(
start=pd.to_datetime(daily.Time(), unit="s", utc=True),
end=pd.to_datetime(daily.TimeEnd(), unit="s", utc=True),
freq=pd.Timedelta(seconds=daily.Interval()),
inclusive="left"
),
"temperature_2m_max": daily_temperature_2m_max,
"temperature_2m_min": daily_temperature_2m_min,
"temperature_2m_mean": daily_temperature_2m_mean,
"precipitation_sum": daily_precipitation_sum,
"precipitation_hours": daily_precipitation_hours,
"shortwave_radiation_sum": daily_shortwave_radiation_sum,
"et0_fao_evapotranspiration": daily_et0_fao_evapotranspiration
}
daily_dataframe = pd.DataFrame(data = daily_data)
return daily_dataframe