Spaces:
Runtime error
Runtime error
Hugo Massonnat
commited on
Commit
·
b23cefb
1
Parent(s):
a972258
pipeline for historical weather data
Browse files- data_pipelines/historical_weather_data.py +70 -0
- requirements.txt +17 -0
data_pipelines/historical_weather_data.py
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import openmeteo_requests
|
2 |
+
|
3 |
+
import requests_cache
|
4 |
+
import pandas as pd
|
5 |
+
from retry_requests import retry
|
6 |
+
|
7 |
+
|
8 |
+
def download_historical_weather_data(
|
9 |
+
latitude: float,
|
10 |
+
longitude: float,
|
11 |
+
start_year: int,
|
12 |
+
end_year: int,
|
13 |
+
) -> pd.DataFrame:
|
14 |
+
# Setup the Open-Meteo API client with cache and retry on error
|
15 |
+
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
|
16 |
+
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
|
17 |
+
openmeteo = openmeteo_requests.Client(session = retry_session)
|
18 |
+
|
19 |
+
# Make sure all required weather variables are listed here
|
20 |
+
# The order of variables in hourly or daily is important to assign them correctly below
|
21 |
+
url = "https://archive-api.open-meteo.com/v1/archive"
|
22 |
+
params = {
|
23 |
+
"latitude": latitude,
|
24 |
+
"longitude": longitude,
|
25 |
+
"start_date": f"{start_year}-01-01",
|
26 |
+
"end_date": f"{end_year}-12-31",
|
27 |
+
"daily": ["temperature_2m_max", "temperature_2m_min", "temperature_2m_mean",
|
28 |
+
"precipitation_sum", "precipitation_hours",
|
29 |
+
"shortwave_radiation_sum",
|
30 |
+
"et0_fao_evapotranspiration"],
|
31 |
+
"timezone": "GMT"
|
32 |
+
}
|
33 |
+
responses = openmeteo.weather_api(url, params=params)
|
34 |
+
|
35 |
+
# Process first location. Add a for-loop for multiple locations or weather models
|
36 |
+
response = responses[0]
|
37 |
+
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
|
38 |
+
print(f"Elevation {response.Elevation()} m asl")
|
39 |
+
print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
|
40 |
+
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")
|
41 |
+
|
42 |
+
# Process daily data. The order of variables needs to be the same as requested.
|
43 |
+
daily = response.Daily()
|
44 |
+
daily_temperature_2m_max = daily.Variables(0).ValuesAsNumpy()
|
45 |
+
daily_temperature_2m_min = daily.Variables(1).ValuesAsNumpy()
|
46 |
+
daily_temperature_2m_mean = daily.Variables(2).ValuesAsNumpy()
|
47 |
+
daily_precipitation_sum = daily.Variables(3).ValuesAsNumpy()
|
48 |
+
daily_precipitation_hours = daily.Variables(4).ValuesAsNumpy()
|
49 |
+
daily_shortwave_radiation_sum = daily.Variables(5).ValuesAsNumpy()
|
50 |
+
daily_et0_fao_evapotranspiration = daily.Variables(6).ValuesAsNumpy()
|
51 |
+
|
52 |
+
daily_data = {
|
53 |
+
"date": pd.date_range(
|
54 |
+
start=pd.to_datetime(daily.Time(), unit="s", utc=True),
|
55 |
+
end=pd.to_datetime(daily.TimeEnd(), unit="s", utc=True),
|
56 |
+
freq=pd.Timedelta(seconds=daily.Interval()),
|
57 |
+
inclusive="left"
|
58 |
+
),
|
59 |
+
"temperature_2m_max": daily_temperature_2m_max,
|
60 |
+
"temperature_2m_min": daily_temperature_2m_min,
|
61 |
+
"temperature_2m_mean": daily_temperature_2m_mean,
|
62 |
+
"precipitation_sum": daily_precipitation_sum,
|
63 |
+
"precipitation_hours": daily_precipitation_hours,
|
64 |
+
"shortwave_radiation_sum": daily_shortwave_radiation_sum,
|
65 |
+
"et0_fao_evapotranspiration": daily_et0_fao_evapotranspiration
|
66 |
+
}
|
67 |
+
|
68 |
+
daily_dataframe = pd.DataFrame(data = daily_data)
|
69 |
+
|
70 |
+
return daily_dataframe
|
requirements.txt
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
mistralai==1.5.0
|
2 |
+
gradio==5.15.0
|
3 |
+
ipykernel==6.29.5
|
4 |
+
fiona==1.10.1
|
5 |
+
chardet==5.2.0
|
6 |
+
crewai==0.100.0
|
7 |
+
crewai-tools==0.33.0
|
8 |
+
pydantic==2.10.6
|
9 |
+
openmeteo-requests==1.3.0
|
10 |
+
requests-cache==1.2.1
|
11 |
+
retry-requests==2.0.0
|
12 |
+
plotly==6.0.0
|
13 |
+
nbformat>=4.2.0
|
14 |
+
pandas
|
15 |
+
openmeteo_requests
|
16 |
+
requests_cache
|
17 |
+
retry_requests
|