Hugo Massonnat commited on
Commit
5f29f14
·
1 Parent(s): 6a1c12b

monthly aggregation of historic weather data

Browse files
data_pipelines/historical_weather_data.py CHANGED
@@ -4,17 +4,19 @@ import requests_cache
4
  import pandas as pd
5
  from retry_requests import retry
6
 
 
 
7
 
8
  def download_historical_weather_data(
9
- latitude: float,
10
- longitude: float,
11
- start_year: int,
12
- end_year: int,
13
  ) -> pd.DataFrame:
14
  # Setup the Open-Meteo API client with cache and retry on error
15
- cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
16
- retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
17
- openmeteo = openmeteo_requests.Client(session = retry_session)
18
 
19
  # Make sure all required weather variables are listed here
20
  # The order of variables in hourly or daily is important to assign them correctly below
@@ -22,12 +24,11 @@ def download_historical_weather_data(
22
  params = {
23
  "latitude": latitude,
24
  "longitude": longitude,
25
- "start_date": f"{start_year}-01-01",
26
- "end_date": f"{end_year}-12-31",
27
- "daily": ["temperature_2m_max", "temperature_2m_min", "temperature_2m_mean",
28
- "precipitation_sum", "precipitation_hours",
29
- "shortwave_radiation_sum",
30
- "et0_fao_evapotranspiration"],
31
  "timezone": "GMT"
32
  }
33
  responses = openmeteo.weather_api(url, params=params)
@@ -39,32 +40,68 @@ def download_historical_weather_data(
39
  print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
40
  print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")
41
 
42
- # Process daily data. The order of variables needs to be the same as requested.
43
- daily = response.Daily()
44
- daily_temperature_2m_max = daily.Variables(0).ValuesAsNumpy()
45
- daily_temperature_2m_min = daily.Variables(1).ValuesAsNumpy()
46
- daily_temperature_2m_mean = daily.Variables(2).ValuesAsNumpy()
47
- daily_precipitation_sum = daily.Variables(3).ValuesAsNumpy()
48
- daily_precipitation_hours = daily.Variables(4).ValuesAsNumpy()
49
- daily_shortwave_radiation_sum = daily.Variables(5).ValuesAsNumpy()
50
- daily_et0_fao_evapotranspiration = daily.Variables(6).ValuesAsNumpy()
51
-
52
- daily_data = {
53
- "date": pd.date_range(
54
- start=pd.to_datetime(daily.Time(), unit="s", utc=True),
55
- end=pd.to_datetime(daily.TimeEnd(), unit="s", utc=True),
56
- freq=pd.Timedelta(seconds=daily.Interval()),
 
 
 
 
 
 
 
57
  inclusive="left"
58
  ),
59
- "temperature_2m_max": daily_temperature_2m_max,
60
- "temperature_2m_min": daily_temperature_2m_min,
61
- "temperature_2m_mean": daily_temperature_2m_mean,
62
- "precipitation_sum": daily_precipitation_sum,
63
- "precipitation_hours": daily_precipitation_hours,
64
- "shortwave_radiation_sum": daily_shortwave_radiation_sum,
65
- "et0_fao_evapotranspiration": daily_et0_fao_evapotranspiration
66
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
- daily_dataframe = pd.DataFrame(data = daily_data)
 
 
 
 
 
 
69
 
70
- return daily_dataframe
 
 
 
4
  import pandas as pd
5
  from retry_requests import retry
6
 
7
+ from compute_et0_adjusted import compute_et0_future
8
+
9
 
10
  def download_historical_weather_data(
11
+ latitude: float,
12
+ longitude: float,
13
+ start_year: int,
14
+ end_year: int,
15
  ) -> pd.DataFrame:
16
  # Setup the Open-Meteo API client with cache and retry on error
17
+ cache_session = requests_cache.CachedSession('.cache', expire_after=-1)
18
+ retry_session = retry(cache_session, retries=5, backoff_factor=0.2)
19
+ openmeteo = openmeteo_requests.Client(session=retry_session)
20
 
21
  # Make sure all required weather variables are listed here
22
  # The order of variables in hourly or daily is important to assign them correctly below
 
24
  params = {
25
  "latitude": latitude,
26
  "longitude": longitude,
27
+ "start_date": f"{start_year}-02-08",
28
+ "end_date": f"{end_year}-02-22",
29
+ "hourly": ["temperature_2m", "relative_humidity_2m",
30
+ "precipitation", "et0_fao_evapotranspiration",
31
+ "wind_speed_10m", "shortwave_radiation"],
 
32
  "timezone": "GMT"
33
  }
34
  responses = openmeteo.weather_api(url, params=params)
 
40
  print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
41
  print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")
42
 
43
+ # Process hourly data. The order of variables needs to be the same as requested.
44
+ hourly = response.Hourly()
45
+ hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
46
+ hourly_relative_humidity_2m = hourly.Variables(1).ValuesAsNumpy()
47
+ hourly_precipitation = hourly.Variables(2).ValuesAsNumpy()
48
+ hourly_et0_fao_evapotranspiration = hourly.Variables(3).ValuesAsNumpy()
49
+ hourly_wind_speed_10m = hourly.Variables(4).ValuesAsNumpy()
50
+ hourly_shortwave_radiation = hourly.Variables(5).ValuesAsNumpy()
51
+
52
+ hourly_data = {
53
+ "temperature_2m": hourly_temperature_2m,
54
+ "relative_humidity_2m": hourly_relative_humidity_2m,
55
+ "precipitation": hourly_precipitation,
56
+ "et0_fao_evapotranspiration": hourly_et0_fao_evapotranspiration,
57
+ "wind_speed_10m": hourly_wind_speed_10m,
58
+ "shortwave_radiation": hourly_shortwave_radiation,
59
+ }
60
+ hourly_dataframe = pd.DataFrame(
61
+ index=pd.date_range(
62
+ start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
63
+ end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
64
+ freq=pd.Timedelta(seconds=hourly.Interval()),
65
  inclusive="left"
66
  ),
67
+ data=hourly_data,
68
+ )
69
+
70
+ return hourly_dataframe
71
+
72
+
73
+ def aggregate_hourly_weather_data(
74
+ hourly_data: pd.DataFrame,
75
+ ) -> pd.DataFrame:
76
+ resampled_data = hourly_data.resample("1ME").agg({
77
+ "temperature_2m": ["min", "max"],
78
+ "relative_humidity_2m": ["min", "max"],
79
+ "wind_speed_10m": "mean",
80
+ "shortwave_radiation": "mean",
81
+ })
82
+
83
+ monthly_data = pd.DataFrame.from_dict({
84
+ "month": resampled_data.index.month,
85
+ "year": resampled_data.index.year,
86
+ "air_temperature_min": resampled_data[("temperature_2m", "min")],
87
+ "air_temperature_max": resampled_data[("temperature_2m", "max")],
88
+ "relative_humidity_min": resampled_data[("relative_humidity_2m", "min")],
89
+ "relative_humidity_max": resampled_data[("relative_humidity_2m", "max")],
90
+ "wind_speed": resampled_data[("wind_speed_10m", "mean")],
91
+ "irradiance": resampled_data[("shortwave_radiation", "mean")],
92
+ })
93
+
94
+ return monthly_data
95
+
96
 
97
+ if __name__ == '__main__':
98
+ latitude = 47
99
+ longitude = 3
100
+ start_year = 2020
101
+ end_year = 2021
102
+ df = download_historical_weather_data(latitude, longitude, start_year, end_year)
103
+ monthly_df = aggregate_hourly_weather_data(df)
104
 
105
+ for i in range(len(monthly_df)):
106
+ et0 = compute_et0_future(monthly_df.iloc[i], latitude, longitude)
107
+ print(et0)
requirements.txt CHANGED
@@ -16,4 +16,5 @@ openmeteo_requests
16
  requests_cache
17
  retry_requests
18
  fuzzywuzzy
19
- plotly
 
 
16
  requests_cache
17
  retry_requests
18
  fuzzywuzzy
19
+ plotly
20
+ pvlib