Hugo Massonnat commited on
Commit
b23cefb
·
1 Parent(s): a972258

pipeline for historical weather data

Browse files
data_pipelines/historical_weather_data.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openmeteo_requests
2
+
3
+ import requests_cache
4
+ import pandas as pd
5
+ from retry_requests import retry
6
+
7
+
8
+ def download_historical_weather_data(
9
+ latitude: float,
10
+ longitude: float,
11
+ start_year: int,
12
+ end_year: int,
13
+ ) -> pd.DataFrame:
14
+ # Setup the Open-Meteo API client with cache and retry on error
15
+ cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
16
+ retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
17
+ openmeteo = openmeteo_requests.Client(session = retry_session)
18
+
19
+ # Make sure all required weather variables are listed here
20
+ # The order of variables in hourly or daily is important to assign them correctly below
21
+ url = "https://archive-api.open-meteo.com/v1/archive"
22
+ params = {
23
+ "latitude": latitude,
24
+ "longitude": longitude,
25
+ "start_date": f"{start_year}-01-01",
26
+ "end_date": f"{end_year}-12-31",
27
+ "daily": ["temperature_2m_max", "temperature_2m_min", "temperature_2m_mean",
28
+ "precipitation_sum", "precipitation_hours",
29
+ "shortwave_radiation_sum",
30
+ "et0_fao_evapotranspiration"],
31
+ "timezone": "GMT"
32
+ }
33
+ responses = openmeteo.weather_api(url, params=params)
34
+
35
+ # Process first location. Add a for-loop for multiple locations or weather models
36
+ response = responses[0]
37
+ print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
38
+ print(f"Elevation {response.Elevation()} m asl")
39
+ print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
40
+ print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")
41
+
42
+ # Process daily data. The order of variables needs to be the same as requested.
43
+ daily = response.Daily()
44
+ daily_temperature_2m_max = daily.Variables(0).ValuesAsNumpy()
45
+ daily_temperature_2m_min = daily.Variables(1).ValuesAsNumpy()
46
+ daily_temperature_2m_mean = daily.Variables(2).ValuesAsNumpy()
47
+ daily_precipitation_sum = daily.Variables(3).ValuesAsNumpy()
48
+ daily_precipitation_hours = daily.Variables(4).ValuesAsNumpy()
49
+ daily_shortwave_radiation_sum = daily.Variables(5).ValuesAsNumpy()
50
+ daily_et0_fao_evapotranspiration = daily.Variables(6).ValuesAsNumpy()
51
+
52
+ daily_data = {
53
+ "date": pd.date_range(
54
+ start=pd.to_datetime(daily.Time(), unit="s", utc=True),
55
+ end=pd.to_datetime(daily.TimeEnd(), unit="s", utc=True),
56
+ freq=pd.Timedelta(seconds=daily.Interval()),
57
+ inclusive="left"
58
+ ),
59
+ "temperature_2m_max": daily_temperature_2m_max,
60
+ "temperature_2m_min": daily_temperature_2m_min,
61
+ "temperature_2m_mean": daily_temperature_2m_mean,
62
+ "precipitation_sum": daily_precipitation_sum,
63
+ "precipitation_hours": daily_precipitation_hours,
64
+ "shortwave_radiation_sum": daily_shortwave_radiation_sum,
65
+ "et0_fao_evapotranspiration": daily_et0_fao_evapotranspiration
66
+ }
67
+
68
+ daily_dataframe = pd.DataFrame(data = daily_data)
69
+
70
+ return daily_dataframe
requirements.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ mistralai==1.5.0
2
+ gradio==5.15.0
3
+ ipykernel==6.29.5
4
+ fiona==1.10.1
5
+ chardet==5.2.0
6
+ crewai==0.100.0
7
+ crewai-tools==0.33.0
8
+ pydantic==2.10.6
9
+ openmeteo-requests==1.3.0
10
+ requests-cache==1.2.1
11
+ retry-requests==2.0.0
12
+ plotly==6.0.0
13
+ nbformat>=4.2.0
14
+ pandas
15
+ openmeteo_requests
16
+ requests_cache
17
+ retry_requests