gaia / data_pipelines /historical_weather_data.py
Akram Sanad
visualization is done
b126e08
import openmeteo_requests
import matplotlib.pyplot as plt
import pandas as pd
import requests_cache
from retry_requests import retry
from compute_et0_adjusted import compute_et0
def download_historical_weather_data(
latitude: float,
longitude: float,
start_year: int,
end_year: int,
) -> pd.DataFrame:
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after=-1)
retry_session = retry(cache_session, retries=5, backoff_factor=0.2)
openmeteo = openmeteo_requests.Client(session=retry_session)
# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
"latitude": latitude,
"longitude": longitude,
"start_date": f"{start_year}-02-08",
"end_date": f"{end_year}-02-22",
"hourly": ["temperature_2m", "relative_humidity_2m",
"precipitation", "et0_fao_evapotranspiration",
"wind_speed_10m", "shortwave_radiation"],
"timezone": "GMT"
}
responses = openmeteo.weather_api(url, params=params)
# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")
# Process hourly data. The order of variables needs to be the same as requested.
hourly = response.Hourly()
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
hourly_relative_humidity_2m = hourly.Variables(1).ValuesAsNumpy()
hourly_precipitation = hourly.Variables(2).ValuesAsNumpy()
hourly_et0_fao_evapotranspiration = hourly.Variables(3).ValuesAsNumpy()
hourly_wind_speed_10m = hourly.Variables(4).ValuesAsNumpy()
hourly_shortwave_radiation = hourly.Variables(5).ValuesAsNumpy()
hourly_data = {
"temperature_2m": hourly_temperature_2m,
"relative_humidity_2m": hourly_relative_humidity_2m,
"precipitation": hourly_precipitation,
"et0_fao_evapotranspiration": hourly_et0_fao_evapotranspiration,
"wind_speed_10m": hourly_wind_speed_10m,
"shortwave_radiation": hourly_shortwave_radiation,
}
hourly_dataframe = pd.DataFrame(
index=pd.date_range(
start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
freq=pd.Timedelta(seconds=hourly.Interval()),
inclusive="left"
),
data=hourly_data,
)
return hourly_dataframe
def aggregate_hourly_weather_data(
hourly_data: pd.DataFrame,
) -> pd.DataFrame:
resampled_data = hourly_data.resample("1ME").agg({
"temperature_2m": ["min", "max", "mean"],
"relative_humidity_2m": ["min", "max"],
"wind_speed_10m": "mean",
"precipitation": "mean",
"shortwave_radiation": "mean",
"et0_fao_evapotranspiration": "mean",
})
monthly_data = pd.DataFrame.from_dict({
"day_of_year": resampled_data.index.dayofyear,
"air_temperature_min": resampled_data[("temperature_2m", "min")],
"air_temperature_max": resampled_data[("temperature_2m", "max")],
"air_temperature_mean": resampled_data[("temperature_2m", "mean")],
"relative_humidity_min": resampled_data[("relative_humidity_2m", "min")],
"relative_humidity_max": resampled_data[("relative_humidity_2m", "max")],
"precipitation": resampled_data[("precipitation", "mean")],
"wind_speed": resampled_data[("wind_speed_10m", "mean")],
"irradiance": resampled_data[("shortwave_radiation", "mean")],
"et0_fao_evapotranspiration": resampled_data[("et0_fao_evapotranspiration", "mean")],
})
return monthly_data
if __name__ == '__main__':
latitude = 47
longitude = 3
start_year = 2000
end_year = 2024
df = download_historical_weather_data(latitude, longitude, start_year, end_year)
monthly_df = aggregate_hourly_weather_data(df)
et0 = compute_et0(monthly_df, latitude, longitude)
monthly_df["et0"] = et0
plt.plot(monthly_df["et0_fao_evapotranspiration"] * 100)
plt.plot(monthly_df["et0"] + 5)
plt.show()