Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -2,35 +2,120 @@ import requests
|
|
2 |
import pandas as pd
|
3 |
from io import StringIO
|
4 |
import streamlit as st
|
5 |
-
import os
|
6 |
import plotly.express as px
|
7 |
import plotly.graph_objects as go
|
8 |
-
import plotly.colors as pc
|
9 |
import numpy as np
|
10 |
-
from sklearn.metrics import mean_squared_error
|
11 |
from statsmodels.tsa.stattools import acf
|
12 |
from statsmodels.graphics.tsaplots import plot_acf
|
13 |
import matplotlib.pyplot as plt
|
14 |
-
from datetime import datetime
|
15 |
import folium
|
16 |
-
import seaborn as sns
|
17 |
from streamlit_folium import st_folium
|
18 |
-
|
|
|
19 |
from entsoe.geo import load_zones
|
20 |
-
from branca.colormap import LinearColormap
|
21 |
import branca
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
|
|
|
|
|
|
23 |
|
24 |
-
def
|
25 |
-
now = datetime.now()
|
26 |
-
current_hour = now.hour
|
27 |
-
current_minute = now.minute
|
28 |
-
# Return the hour and a boolean indicating if it is after the 10th minute
|
29 |
-
return current_hour, current_minute >= 10
|
30 |
|
31 |
-
|
32 |
-
@st.cache_data(show_spinner=False)
|
33 |
-
def load_GitHub(github_token, file_name, hour, after_10_min):
|
34 |
url = f'https://raw.githubusercontent.com/margaridamascarenhas/Transparency_Data/main/{file_name}'
|
35 |
headers = {'Authorization': f'token {github_token}'}
|
36 |
|
@@ -42,102 +127,52 @@ def load_GitHub(github_token, file_name, hour, after_10_min):
|
|
42 |
if 'Date' in df.columns:
|
43 |
df['Date'] = pd.to_datetime(df['Date']) # Convert 'Date' column to datetime
|
44 |
df.set_index('Date', inplace=True) # Set 'Date' column as the index
|
45 |
-
|
46 |
-
|
|
|
47 |
else:
|
48 |
print(f"Failed to download {file_name}. Status code: {response.status_code}")
|
49 |
return None
|
50 |
-
|
51 |
-
@st.cache_data(show_spinner=False)
|
52 |
-
def load_forecast(github_token, hour, after_10_min):
|
53 |
-
predictions_dict = {}
|
54 |
-
for hour in range(24):
|
55 |
-
file_name = f'Predictions_{hour}h.csv'
|
56 |
-
df = load_GitHub(github_token, file_name, hour, after_10_min)
|
57 |
-
if df is not None:
|
58 |
-
predictions_dict[file_name] = df
|
59 |
-
return predictions_dict
|
60 |
-
|
61 |
-
def convert_European_time(data, time_zone):
|
62 |
-
data.index = pd.to_datetime(data.index, utc=True)
|
63 |
-
data.index = data.index.tz_convert(time_zone)
|
64 |
-
data.index = data.index.tz_localize(None)
|
65 |
-
return data
|
66 |
|
67 |
-
def
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
}
|
74 |
|
75 |
-
|
76 |
-
|
77 |
-
df.columns = df.columns.str.replace(original, simplified, regex=True)
|
78 |
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
return df
|
94 |
|
95 |
github_token = st.secrets["GitHub_Token_KUL_Margarida"]
|
|
|
96 |
|
97 |
if github_token:
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
Data_BE=load_GitHub(github_token, 'BE_Elia_Entsoe_UTC.csv', hour, after_10_min)
|
104 |
-
Data_FR=load_GitHub(github_token, 'FR_Entsoe_UTC.csv', hour, after_10_min)
|
105 |
-
Data_NL=load_GitHub(github_token, 'NL_Entsoe_UTC.csv', hour, after_10_min)
|
106 |
-
Data_DE=load_GitHub(github_token, 'DE_Entsoe_UTC.csv', hour, after_10_min)
|
107 |
-
Data_PT=load_GitHub(github_token, 'PT_Entsoe_UTC.csv', hour, after_10_min)
|
108 |
-
Data_ES=load_GitHub(github_token, 'ES_Entsoe_UTC.csv', hour, after_10_min)
|
109 |
-
Data_AT=load_GitHub(github_token, 'AT_Entsoe_UTC.csv', hour, after_10_min)
|
110 |
-
Data_IT_CALA=load_GitHub(github_token, 'IT_CALA_Entsoe_UTC.csv', hour, after_10_min)
|
111 |
-
Data_IT_CNOR=load_GitHub(github_token, 'IT_CNOR_Entsoe_UTC.csv', hour, after_10_min)
|
112 |
-
Data_IT_CSUD=load_GitHub(github_token, 'IT_CSUD_Entsoe_UTC.csv', hour, after_10_min)
|
113 |
-
Data_IT_NORD=load_GitHub(github_token, 'IT_NORD_Entsoe_UTC.csv', hour, after_10_min)
|
114 |
-
Data_IT_SICI=load_GitHub(github_token, 'IT_SICI_Entsoe_UTC.csv', hour, after_10_min)
|
115 |
-
Data_IT_SUD=load_GitHub(github_token, 'IT_SUD_Entsoe_UTC.csv', hour, after_10_min)
|
116 |
-
Data_DK_1=load_GitHub(github_token, 'DK_1_Entsoe_UTC.csv', hour, after_10_min)
|
117 |
-
Data_DK_2=load_GitHub(github_token, 'DK_2_Entsoe_UTC.csv', hour, after_10_min)
|
118 |
-
|
119 |
-
Data_BE=convert_European_time(Data_BE, 'Europe/Brussels')
|
120 |
-
Data_FR=convert_European_time(Data_FR, 'Europe/Paris')
|
121 |
-
Data_NL=convert_European_time(Data_NL, 'Europe/Amsterdam')
|
122 |
-
Data_DE=convert_European_time(Data_DE, 'Europe/Berlin')
|
123 |
-
Data_PT=convert_European_time(Data_PT, 'Europe/Lisbon')
|
124 |
-
Data_ES=convert_European_time(Data_ES, 'Europe/Madrid')
|
125 |
-
Data_AT=convert_European_time(Data_AT, 'Europe/Vienna')
|
126 |
-
Data_IT_CALA = convert_European_time(Data_IT_CALA, 'Europe/Rome')
|
127 |
-
Data_IT_CNOR = convert_European_time(Data_IT_CNOR, 'Europe/Rome')
|
128 |
-
Data_IT_CSUD = convert_European_time(Data_IT_CSUD, 'Europe/Rome')
|
129 |
-
Data_IT_NORD = convert_European_time(Data_IT_NORD, 'Europe/Rome')
|
130 |
-
Data_IT_SICI = convert_European_time(Data_IT_SICI, 'Europe/Rome')
|
131 |
-
Data_IT_SUD = convert_European_time(Data_IT_SUD, 'Europe/Rome')
|
132 |
-
Data_DK_1 = convert_European_time(Data_DK_1, 'Europe/Copenhagen')
|
133 |
-
Data_DK_2 = convert_European_time(Data_DK_2, 'Europe/Copenhagen')
|
134 |
-
|
135 |
|
136 |
else:
|
137 |
print("Please enter your GitHub Personal Access Token to proceed.")
|
138 |
|
139 |
-
|
140 |
-
col1, col2 = st.columns([5, 2]) # Adjust the ratio to better fit your layout needs
|
141 |
with col1:
|
142 |
st.title("Transparency++")
|
143 |
|
@@ -150,85 +185,19 @@ with col2:
|
|
150 |
with col2_2:
|
151 |
st.image("energyville_logo.png", width=100)
|
152 |
|
153 |
-
|
154 |
-
st.write("**Evaluate and analyze ENTSO-E Transparency Platform data quality, forecast accuracy, and energy trends for Portugal, Spain, Belgium, France, Germany-Luxembourg, Austria, the Netherlands, Italy and Denmark.**")
|
155 |
-
|
156 |
-
upper_space.markdown("""
|
157 |
-
|
158 |
-
|
159 |
-
""", unsafe_allow_html=True)
|
160 |
-
|
161 |
-
countries = {
|
162 |
-
'Overall': 'Overall',
|
163 |
-
'Austria': 'AT',
|
164 |
-
'Belgium': 'BE',
|
165 |
-
'Denmark 1': 'DK_1',
|
166 |
-
'Denmark 2': 'DK_2',
|
167 |
-
'France': 'FR',
|
168 |
-
'Germany-Luxembourg': 'DE_LU',
|
169 |
-
'Italy Calabria': 'IT_CALA',
|
170 |
-
'Italy Central North': 'IT_CNOR',
|
171 |
-
'Italy Central South': 'IT_CSUD',
|
172 |
-
'Italy North': 'IT_NORD',
|
173 |
-
'Italy Sicily': 'IT_SICI',
|
174 |
-
'Italy South': 'IT_SUD',
|
175 |
-
'Netherlands': 'NL',
|
176 |
-
'Portugal': 'PT',
|
177 |
-
'Spain': 'ES',
|
178 |
-
}
|
179 |
-
|
180 |
-
data_dict = {
|
181 |
-
'BE': Data_BE,
|
182 |
-
'FR': Data_FR,
|
183 |
-
'DE_LU': Data_DE,
|
184 |
-
'NL': Data_NL,
|
185 |
-
'PT': Data_PT,
|
186 |
-
'AT': Data_AT,
|
187 |
-
'ES': Data_ES,
|
188 |
-
'IT_CALA': Data_IT_CALA,
|
189 |
-
'IT_CNOR': Data_IT_CNOR,
|
190 |
-
'IT_CSUD': Data_IT_CSUD,
|
191 |
-
'IT_NORD': Data_IT_NORD,
|
192 |
-
'IT_SICI': Data_IT_SICI,
|
193 |
-
'IT_SUD': Data_IT_SUD,
|
194 |
-
'DK_1': Data_DK_1,
|
195 |
-
'DK_2': Data_DK_2,
|
196 |
-
}
|
197 |
-
|
198 |
-
countries_all_RES = ['BE', 'FR', 'NL', 'DE_LU', 'PT', 'DK_1', 'DK_2']
|
199 |
-
countries_no_offshore= ['AT', 'ES', 'IT_CALA', 'IT_CNOR', 'IT_CSUD', 'IT_NORD', 'IT_SICI', 'IT_SUD',]
|
200 |
-
|
201 |
-
installed_capacities = {
|
202 |
-
'FR': { 'Solar': 17419, 'Wind Offshore': 1483, 'Wind Onshore': 22134},
|
203 |
-
'DE_LU': { 'Solar': 73821, 'Wind Offshore': 8386, 'Wind Onshore': 59915},
|
204 |
-
'BE': { 'Solar': 8789, 'Wind Offshore': 2262, 'Wind Onshore': 3053},
|
205 |
-
'NL': { 'Solar': 22590, 'Wind Offshore': 3220, 'Wind Onshore': 6190},
|
206 |
-
'PT': { 'Solar': 1811, 'Wind Offshore': 25, 'Wind Onshore': 5333},
|
207 |
-
'ES': { 'Solar': 23867, 'Wind Onshore': 30159},
|
208 |
-
'AT': { 'Solar': 7294, 'Wind Onshore': 4021 },
|
209 |
-
'DK_1': { 'Solar': 2738, 'Wind Offshore': 1601, 'Wind Onshore': 4112},
|
210 |
-
'DK_2': { 'Solar': 992, 'Wind Offshore': 1045, 'Wind Onshore': 748},
|
211 |
-
}
|
212 |
-
|
213 |
-
forecast_columns_all_RES = [
|
214 |
-
'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
|
215 |
-
|
216 |
-
forecast_columns_no_wind_offshore = [
|
217 |
-
'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
|
218 |
-
|
219 |
|
220 |
st.sidebar.header('Filters')
|
221 |
|
222 |
st.sidebar.subheader("Select Country")
|
223 |
st.sidebar.caption("Choose the country for which you want to display data or forecasts.")
|
|
|
|
|
224 |
|
225 |
-
selected_country = st.sidebar.selectbox('Select Country', list(countries.keys()))
|
226 |
-
|
227 |
-
# Sidebar with radio buttons for different sections
|
228 |
if selected_country != 'Overall':
|
229 |
st.sidebar.subheader("Section")
|
230 |
st.sidebar.caption("Select the type of information you want to explore.")
|
231 |
-
section = st.sidebar.radio('
|
232 |
else:
|
233 |
section = None # No section is shown when "Overall" is selected
|
234 |
|
@@ -236,124 +205,172 @@ if selected_country == 'Overall':
|
|
236 |
data = None # You can set data to None or a specific dataset based on your logic
|
237 |
section = None # No section selected when "Overall" is chosen
|
238 |
else:
|
239 |
-
country_code =
|
240 |
-
data = data_dict.get(
|
241 |
-
if country_code in countries_all_RES:
|
242 |
-
forecast_columns = forecast_columns_all_RES
|
243 |
-
elif country_code in countries_no_offshore:
|
244 |
-
forecast_columns = forecast_columns_no_wind_offshore
|
245 |
-
if country_code == 'BE':
|
246 |
-
weather_columns = ['Temperature', 'Wind Speed Onshore', 'Wind Speed Offshore']
|
247 |
-
data['Temperature'] = data['temperature_2m_8']
|
248 |
-
data['Wind Speed Onshore'] = data['wind_speed_100m_8']
|
249 |
-
data['Wind Speed Offshore'] = data['wind_speed_100m_4']
|
250 |
-
else:
|
251 |
-
weather_columns = ['Temperature', 'Wind Speed']
|
252 |
-
data['Temperature'] = data['temperature_2m']
|
253 |
-
data['Wind Speed'] = data['wind_speed_100m']
|
254 |
-
|
255 |
|
256 |
if section == 'Data Quality':
|
257 |
-
|
258 |
st.header('Data Quality')
|
259 |
-
|
260 |
-
st.write('The table below presents the data quality metrics focusing on the percentage of missing values and the occurrence of extreme or nonsensical values for the selected country.')
|
261 |
|
262 |
-
|
|
|
|
|
263 |
|
264 |
-
|
265 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
266 |
|
267 |
-
|
268 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
269 |
missing_values = missing_values.round(2)
|
270 |
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
303 |
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
|
|
|
|
|
|
312 |
|
313 |
-
|
|
|
|
|
|
|
|
|
|
|
314 |
|
315 |
-
st.
|
316 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
317 |
|
318 |
elif section == 'Forecasts Quality':
|
319 |
-
|
320 |
st.header('Forecast Quality')
|
321 |
|
322 |
# Time series for last 1 week
|
323 |
last_week = data.loc[data.index >= (data.index[-1] - pd.Timedelta(days=7))]
|
324 |
st.write('The below plot shows the time series of forecasts vs. observations provided by the ENTSO-E Transparency platform from the past week.')
|
325 |
-
|
326 |
-
# Options for selecting the data to display
|
327 |
-
if country_code in countries_all_RES:
|
328 |
-
variable_options = {
|
329 |
-
"Load": ("Load_entsoe", "Load_forecast_entsoe"),
|
330 |
-
"Solar": ("Solar_entsoe", "Solar_forecast_entsoe"),
|
331 |
-
"Wind Onshore": ("Wind_onshore_entsoe", "Wind_onshore_forecast_entsoe"),
|
332 |
-
"Wind Offshore": ("Wind_offshore_entsoe", "Wind_offshore_forecast_entsoe")
|
333 |
-
}
|
334 |
-
elif country_code in countries_no_offshore:
|
335 |
-
variable_options = {
|
336 |
-
"Load": ("Load_entsoe", "Load_forecast_entsoe"),
|
337 |
-
"Solar": ("Solar_entsoe", "Solar_forecast_entsoe"),
|
338 |
-
"Wind Onshore": ("Wind_onshore_entsoe", "Wind_onshore_forecast_entsoe"),
|
339 |
-
}
|
340 |
-
else:
|
341 |
-
print('Country code doesnt correspond.')
|
342 |
-
|
343 |
# Dropdown to select the variable
|
344 |
selected_variable = st.selectbox("Select Variable for Line PLot", list(variable_options.keys()))
|
345 |
-
|
346 |
-
# Get the corresponding columns for the selected variable
|
347 |
actual_col, forecast_col = variable_options[selected_variable]
|
348 |
|
349 |
-
|
|
|
|
|
|
|
|
|
350 |
fig = go.Figure()
|
351 |
-
fig.add_trace(go.Scatter(x=
|
352 |
-
fig.add_trace(go.Scatter(x=
|
353 |
-
fig.update_layout(title=f
|
354 |
-
|
355 |
st.plotly_chart(fig)
|
356 |
|
|
|
357 |
# Scatter plots for error distribution
|
358 |
st.subheader('Error Distribution')
|
359 |
st.write('The below scatter plots show the error distribution of all fields: Solar, Wind and Load.')
|
@@ -362,19 +379,24 @@ elif section == 'Forecasts Quality':
|
|
362 |
# Get the corresponding columns for the selected variable
|
363 |
actual_col, forecast_col = variable_options[selected_variable]
|
364 |
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
|
|
|
|
|
|
|
|
|
|
|
376 |
st.plotly_chart(fig)
|
377 |
-
|
378 |
st.subheader('Accuracy Metrics (Sorted by rMAE):')
|
379 |
|
380 |
date_range = st.date_input(
|
@@ -388,99 +410,92 @@ elif section == 'Forecasts Quality':
|
|
388 |
else:
|
389 |
st.error("Please select a valid date range.")
|
390 |
st.stop()
|
391 |
-
|
392 |
-
|
393 |
-
output_text = f"The below metrics are calculated from the selected date range from {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}. "
|
394 |
st.write(output_text)
|
395 |
|
396 |
-
|
397 |
-
|
398 |
-
if country_code in countries_all_RES:
|
399 |
-
accuracy_metrics = pd.DataFrame(columns=['MAE', 'rMAE'], index=['Load', 'Solar', 'Wind Onshore', 'Wind Offshore'])
|
400 |
-
elif country_code in countries_no_offshore:
|
401 |
-
accuracy_metrics = pd.DataFrame(columns=['MAE', 'rMAE'], index=['Load', 'Solar', 'Wind Onshore'])
|
402 |
-
else:
|
403 |
-
print('Country code doesnt correspond.')
|
404 |
|
|
|
405 |
|
406 |
-
for
|
407 |
-
actual_col =
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
|
418 |
-
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
-
row_label = 'Load' if 'Load' in actual_col else 'Solar' if 'Solar' in actual_col else 'Wind Offshore' if 'Wind_offshore' in actual_col else 'Wind Onshore'
|
424 |
-
accuracy_metrics.loc[row_label] = [mae, rmae]
|
425 |
|
426 |
accuracy_metrics.dropna(how='all', inplace=True)# Sort by rMAE (second column)
|
427 |
-
accuracy_metrics.sort_values(by=accuracy_metrics.columns[1], ascending=True, inplace=True)
|
428 |
accuracy_metrics = accuracy_metrics.round(4)
|
429 |
|
430 |
-
col1, col2 = st.columns([1,
|
431 |
|
432 |
with col1:
|
|
|
433 |
st.markdown(
|
434 |
"""
|
435 |
<style>
|
436 |
-
.small-chart {
|
437 |
-
margin-top:
|
438 |
}
|
439 |
</style>
|
440 |
""",
|
441 |
unsafe_allow_html=True
|
442 |
)
|
443 |
st.dataframe(accuracy_metrics)
|
444 |
-
st.markdown(
|
445 |
-
"""
|
446 |
-
<style>
|
447 |
-
.small-chart {
|
448 |
-
margin-top: -30px; /* Adjust this value as needed */
|
449 |
-
}
|
450 |
-
</style>
|
451 |
-
""",
|
452 |
-
unsafe_allow_html=True
|
453 |
-
)
|
454 |
|
455 |
with col2:
|
456 |
-
|
457 |
-
# Prepare data for the radar chart
|
458 |
rmae_values = accuracy_metrics['rMAE'].tolist()
|
459 |
-
categories
|
460 |
-
|
461 |
-
|
462 |
-
fig
|
463 |
-
|
464 |
-
|
465 |
-
|
466 |
-
|
467 |
-
|
468 |
-
|
469 |
-
|
|
|
|
|
470 |
fig.update_layout(
|
471 |
-
width=
|
472 |
-
height=
|
473 |
-
margin=dict(
|
|
|
|
|
|
|
|
|
|
|
474 |
polar=dict(
|
|
|
|
|
|
|
475 |
radialaxis=dict(
|
476 |
visible=True,
|
477 |
-
range=[0, max(rmae_values)
|
478 |
-
)
|
|
|
479 |
showlegend=False
|
480 |
)
|
481 |
-
|
482 |
-
#
|
483 |
-
st.
|
|
|
|
|
484 |
|
485 |
st.subheader('ACF plots of Errors')
|
486 |
st.write('The below plots show the ACF (Auto-Correlation Function) for the errors of all three data fields obtained from ENTSO-E: Solar, Wind and Load.')
|
@@ -504,7 +519,7 @@ elif section == 'Forecasts Quality':
|
|
504 |
|
505 |
# Optionally calculate and store ACF values for further analysis if needed
|
506 |
acf_values = acf(error.dropna(), nlags=240)
|
507 |
-
|
508 |
elif section == 'Insights':
|
509 |
st.header("Insights")
|
510 |
|
@@ -516,23 +531,15 @@ elif section == 'Insights':
|
|
516 |
|
517 |
# Resample data based on the selected resolution
|
518 |
if resolution == 'Hourly':
|
519 |
-
resampled_data =
|
520 |
elif resolution == 'Daily':
|
521 |
-
resampled_data =
|
522 |
|
523 |
-
# Select the necessary columns for the scatter plot
|
524 |
-
if country_code in countries_all_RES:
|
525 |
-
selected_columns = ['Load_entsoe', 'Solar_entsoe', 'Wind_offshore_entsoe', 'Wind_onshore_entsoe'] + weather_columns
|
526 |
-
elif country_code in countries_no_offshore:
|
527 |
-
selected_columns = ['Load_entsoe', 'Solar_entsoe', 'Wind_onshore_entsoe'] + weather_columns
|
528 |
-
else:
|
529 |
-
print('Country code doesnt correspond.')
|
530 |
|
531 |
-
|
532 |
-
selected_df.columns = [col.replace('_entsoe', '').replace('_', ' ') for col in selected_df.columns]
|
533 |
|
534 |
# Drop missing values
|
535 |
-
selected_df =
|
536 |
|
537 |
# Create the scatter plots using seaborn's pairplot
|
538 |
sns.set_theme(style="ticks")
|
@@ -543,30 +550,24 @@ elif section == 'Insights':
|
|
543 |
|
544 |
elif selected_country == 'Overall':
|
545 |
|
546 |
-
def get_forecast_columns(country_code):
|
547 |
-
if country_code in countries_all_RES:
|
548 |
-
return forecast_columns_all_RES
|
549 |
-
elif country_code in countries_no_offshore:
|
550 |
-
return forecast_columns_no_wind_offshore
|
551 |
-
else:
|
552 |
-
print('Country code doesnt correspond.')
|
553 |
-
|
554 |
def calculate_net_load_error(df, country_code):
|
555 |
-
|
556 |
-
filter_df = df
|
557 |
-
|
558 |
-
|
559 |
-
|
560 |
-
|
561 |
-
|
562 |
-
|
563 |
-
|
564 |
-
|
565 |
-
|
566 |
-
|
567 |
-
|
568 |
-
|
569 |
-
|
|
|
|
|
570 |
# Calculate the error based on the latest values
|
571 |
error = (net_load_forecast - net_load).iloc[-1]
|
572 |
date = filter_df.index[-1].strftime("%Y-%m-%d %H:%M") # Get the latest date in string format
|
@@ -574,29 +575,41 @@ elif selected_country == 'Overall':
|
|
574 |
return error, date
|
575 |
|
576 |
def plot_net_load_error_map(data_dict):
|
577 |
-
#
|
578 |
-
|
579 |
-
|
580 |
-
|
581 |
-
|
582 |
-
|
583 |
df_net_load_error = pd.DataFrame({
|
584 |
-
|
585 |
-
|
586 |
-
|
587 |
})
|
588 |
|
589 |
-
#
|
|
|
|
|
|
|
|
|
|
|
590 |
date = pd.Timestamp.now()
|
591 |
-
|
592 |
|
593 |
-
#
|
594 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
595 |
|
596 |
-
# Map country codes to country names
|
597 |
-
countries_code_to_name = {v: k for k, v in countries.items()}
|
598 |
-
geo_data['name'] = geo_data['zoneName'].map(countries_code_to_name)
|
599 |
|
|
|
|
|
|
|
|
|
600 |
# Merge net_load_error and date into geo_data
|
601 |
geo_data = geo_data.merge(df_net_load_error, on='zoneName', how='left')
|
602 |
|
@@ -633,7 +646,7 @@ elif selected_country == 'Overall':
|
|
633 |
geo_data,
|
634 |
style_function=style_function,
|
635 |
tooltip=folium.GeoJsonTooltip(
|
636 |
-
fields=["
|
637 |
aliases=["Country:", "Net Load Error [MW]:", "Date:"],
|
638 |
localize=True
|
639 |
)
|
@@ -643,7 +656,7 @@ elif selected_country == 'Overall':
|
|
643 |
colormap.add_to(m)
|
644 |
|
645 |
# Display the map
|
646 |
-
_
|
647 |
|
648 |
def calculate_mae(actual, forecast):
|
649 |
return np.mean(np.abs(actual - forecast))
|
@@ -651,40 +664,36 @@ elif selected_country == 'Overall':
|
|
651 |
def calculate_persistence_mae(data, shift_hours):
|
652 |
return np.mean(np.abs(data - data.shift(shift_hours)))
|
653 |
|
654 |
-
def calculate_rmae_for_country(df):
|
655 |
rmae = {}
|
656 |
rmae['Load'] = calculate_mae(df['Load_entsoe'], df['Load_forecast_entsoe']) / calculate_persistence_mae(df['Load_entsoe'], 168)
|
657 |
-
|
658 |
-
|
659 |
-
|
660 |
-
|
661 |
-
rmae['Wind_offshore'] = calculate_mae(df['Wind_offshore_entsoe'], df['Wind_offshore_forecast_entsoe']) / calculate_persistence_mae(df['Wind_offshore_entsoe'], 24)
|
662 |
-
else:
|
663 |
-
rmae['Wind_offshore'] = None # Mark as None if not applicable
|
664 |
-
|
665 |
-
rmae['Solar'] = calculate_mae(df['Solar_entsoe'], df['Solar_forecast_entsoe']) / calculate_persistence_mae(df['Solar_entsoe'], 24)
|
666 |
|
|
|
|
|
|
|
|
|
|
|
|
|
667 |
return rmae
|
668 |
|
669 |
def create_rmae_dataframe(data_dict):
|
670 |
|
671 |
-
rmae_values = {'Country': [], 'Load': [], '
|
672 |
|
673 |
for country_name, df in data_dict.items():
|
674 |
-
|
675 |
-
|
676 |
-
|
|
|
677 |
|
678 |
rmae_values['Country'].append(country_name)
|
679 |
-
rmae_values['Load'].append(rmae['Load'])
|
680 |
-
rmae_values['Wind_onshore'].append(rmae['Wind_onshore'])
|
681 |
-
rmae_values['Solar'].append(rmae['Solar'])
|
682 |
|
683 |
-
|
684 |
-
|
685 |
-
rmae_values['Wind_offshore'].append(rmae['Wind_offshore'])
|
686 |
-
else:
|
687 |
-
rmae_values['Wind_offshore'].append(np.nan) # Insert NaN for countries without offshore wind
|
688 |
|
689 |
return pd.DataFrame(rmae_values)
|
690 |
|
@@ -692,10 +701,14 @@ elif selected_country == 'Overall':
|
|
692 |
fig = go.Figure()
|
693 |
|
694 |
# Dynamically adjust angles to exclude Wind_offshore if all values are NaN
|
695 |
-
angles = ['Load'
|
696 |
-
if not rmae_df['
|
697 |
-
angles.append('
|
698 |
-
|
|
|
|
|
|
|
|
|
699 |
for _, row in rmae_df.iterrows():
|
700 |
fig.add_trace(go.Scatterpolar(
|
701 |
r=[row[angle] for angle in angles],
|
@@ -735,7 +748,3 @@ elif selected_country == 'Overall':
|
|
735 |
|
736 |
# Plot radar chart for the selected countries
|
737 |
plot_rmae_radar_chart(filtered_rmae_df)
|
738 |
-
|
739 |
-
|
740 |
-
|
741 |
-
|
|
|
2 |
import pandas as pd
|
3 |
from io import StringIO
|
4 |
import streamlit as st
|
|
|
5 |
import plotly.express as px
|
6 |
import plotly.graph_objects as go
|
|
|
7 |
import numpy as np
|
|
|
8 |
from statsmodels.tsa.stattools import acf
|
9 |
from statsmodels.graphics.tsaplots import plot_acf
|
10 |
import matplotlib.pyplot as plt
|
|
|
11 |
import folium
|
|
|
12 |
from streamlit_folium import st_folium
|
13 |
+
import seaborn as sns
|
14 |
+
import datetime
|
15 |
from entsoe.geo import load_zones
|
|
|
16 |
import branca
|
17 |
+
import pytz
|
18 |
+
import time
|
19 |
+
from entsoe import EntsoePandasClient
|
20 |
+
import geopandas as gpd
|
21 |
+
|
22 |
+
|
23 |
+
tz = pytz.timezone('Europe/Brussels')
|
24 |
+
|
25 |
+
def load_capacity_csv(path: str) -> dict:
|
26 |
+
"""Load installed capacities CSV into a dict: Country -> {tech: value} """
|
27 |
+
df = pd.read_csv(path, index_col='Country')
|
28 |
+
# Ensure numeric and handle missing
|
29 |
+
df = df.replace({"NaN": np.nan}).astype(float)
|
30 |
+
return df.to_dict(orient='index')
|
31 |
+
|
32 |
+
# Load installed capacities from CSV files
|
33 |
+
installed_capacities_2024 = load_capacity_csv('installed_capacities_2024.csv')
|
34 |
+
installed_capacities_2025 = load_capacity_csv('installed_capacities_2025.csv')
|
35 |
+
|
36 |
+
TECHS = ['Solar', 'Wind Offshore', 'Wind Onshore']
|
37 |
+
#countries = [ 'AT', 'BE', 'NL', 'BG', 'HR', 'CZ', 'DE_LU', 'DK_1', 'DK_2',
|
38 |
+
#'EE', 'FI', 'FR', 'GR', 'HU', 'IT_CALA', 'IT_CNOR',
|
39 |
+
#'IT_CSUD', 'IT_NORD', 'IT_SARD', 'IT_SICI', 'IT_SUD', 'LV', 'LT',
|
40 |
+
#'NO_1', 'NO_2', 'NO_3', 'NO_4', 'NO_5', 'PL', 'PT', 'RO',
|
41 |
+
#'SE_1', 'SE_2', 'SE_3', 'SE_4', 'RS', 'SK', 'SI', 'ES', 'CH', 'ME','IE_SEM','MK','CY','BA','AL','XK']
|
42 |
+
|
43 |
+
countries = ['AT', 'BE', 'DE_LU', 'DK_1', 'DK_2', 'FR', 'IT_CALA', 'IT_CNOR',
|
44 |
+
'IT_CSUD', 'IT_NORD', 'IT_SARD', 'IT_SICI', 'IT_SUD',
|
45 |
+
'NL', 'PT', 'ES']
|
46 |
+
|
47 |
+
def get_time_zone(country_code):
|
48 |
+
|
49 |
+
tz_map = {
|
50 |
+
'AL': 'Europe/Tirane',
|
51 |
+
'AT': 'Europe/Vienna',
|
52 |
+
'BE': 'Europe/Brussels',
|
53 |
+
'BA': 'Europe/Sarajevo',
|
54 |
+
'BG': 'Europe/Sofia',
|
55 |
+
'HR': 'Europe/Zagreb',
|
56 |
+
'CY': 'Asia/Nicosia',
|
57 |
+
'CZ': 'Europe/Prague',
|
58 |
+
'DE_LU': 'Europe/Berlin',
|
59 |
+
'DK_1': 'Europe/Copenhagen',
|
60 |
+
'DK_2': 'Europe/Copenhagen',
|
61 |
+
'EE': 'Europe/Tallinn',
|
62 |
+
'FI': 'Europe/Helsinki',
|
63 |
+
'MK': 'Europe/Skopje',
|
64 |
+
'FR': 'Europe/Paris',
|
65 |
+
'GR': 'Europe/Athens',
|
66 |
+
'HU': 'Europe/Budapest',
|
67 |
+
'IS': 'Atlantic/Reykjavik',
|
68 |
+
'IE_SEM': 'Europe/Dublin',
|
69 |
+
'IT_CALA': 'Europe/Rome',
|
70 |
+
'IT_CNOR': 'Europe/Rome',
|
71 |
+
'IT_CSUD': 'Europe/Rome',
|
72 |
+
'IT_NORD': 'Europe/Rome',
|
73 |
+
'IT_SARD': 'Europe/Rome',
|
74 |
+
'IT_SICI': 'Europe/Rome',
|
75 |
+
'IT_SUD': 'Europe/Rome',
|
76 |
+
'LV': 'Europe/Riga',
|
77 |
+
'LT': 'Europe/Vilnius',
|
78 |
+
'ME': 'Europe/Podgorica',
|
79 |
+
'NL': 'Europe/Amsterdam',
|
80 |
+
'NO_1': 'Europe/Oslo',
|
81 |
+
'NO_2': 'Europe/Oslo',
|
82 |
+
'NO_3': 'Europe/Oslo',
|
83 |
+
'NO_4': 'Europe/Oslo',
|
84 |
+
'NO_5': 'Europe/Oslo',
|
85 |
+
'PL': 'Europe/Warsaw',
|
86 |
+
'PT': 'Europe/Lisbon',
|
87 |
+
'MD': 'Europe/Chisinau',
|
88 |
+
'RO': 'Europe/Bucharest',
|
89 |
+
'SE_1': 'Europe/Stockholm',
|
90 |
+
'SE_2': 'Europe/Stockholm',
|
91 |
+
'SE_3': 'Europe/Stockholm',
|
92 |
+
'SE_4': 'Europe/Stockholm',
|
93 |
+
'RS': 'Europe/Belgrade',
|
94 |
+
'SK': 'Europe/Bratislava',
|
95 |
+
'SI': 'Europe/Ljubljana',
|
96 |
+
'ES': 'Europe/Madrid',
|
97 |
+
'CH': 'Europe/Zurich',
|
98 |
+
'XK': 'Europe/Rome'
|
99 |
+
}
|
100 |
+
if country_code in tz_map:
|
101 |
+
return tz_map[country_code]
|
102 |
+
else:
|
103 |
+
raise ValueError(f"Time zone for country code {country_code} is not defined.")
|
104 |
+
|
105 |
+
def convert_European_time(data, bdz):
|
106 |
+
time_zone = get_time_zone(bdz)
|
107 |
+
data.index = pd.to_datetime(data.index, utc=True)
|
108 |
+
data.index = data.index.tz_convert(time_zone)
|
109 |
+
data.index = data.index.tz_localize(None)
|
110 |
+
return data
|
111 |
|
112 |
+
def filter_dataframe(df):
|
113 |
+
allowed_columns = {"Load_entsoe", "Load_forecast_entsoe", "Solar_entsoe", "Solar_forecast_entsoe", "Wind_onshore_entsoe", "Wind_onshore_forecast_entsoe", "Wind_offshore_entsoe", "Wind_offshore_forecast_entsoe"}
|
114 |
+
return df[[col for col in df.columns if col in allowed_columns]]
|
115 |
|
116 |
+
def load_GitHub(github_token, bdz):
|
|
|
|
|
|
|
|
|
|
|
117 |
|
118 |
+
file_name=f'{bdz}_Entsoe_UTC.csv'
|
|
|
|
|
119 |
url = f'https://raw.githubusercontent.com/margaridamascarenhas/Transparency_Data/main/{file_name}'
|
120 |
headers = {'Authorization': f'token {github_token}'}
|
121 |
|
|
|
127 |
if 'Date' in df.columns:
|
128 |
df['Date'] = pd.to_datetime(df['Date']) # Convert 'Date' column to datetime
|
129 |
df.set_index('Date', inplace=True) # Set 'Date' column as the index
|
130 |
+
df=filter_dataframe(df)
|
131 |
+
df=convert_European_time(df, bdz)
|
132 |
+
return df[df.index >= pd.Timestamp('2024-01-01')]
|
133 |
else:
|
134 |
print(f"Failed to download {file_name}. Status code: {response.status_code}")
|
135 |
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
|
137 |
+
def filter_variable_options(df):
|
138 |
+
all_options = {
|
139 |
+
"Load": ("Load_entsoe", "Load_forecast_entsoe"),
|
140 |
+
"Solar": ("Solar_entsoe", "Solar_forecast_entsoe"),
|
141 |
+
"Wind Onshore": ("Wind_onshore_entsoe", "Wind_onshore_forecast_entsoe"),
|
142 |
+
"Wind Offshore": ("Wind_offshore_entsoe", "Wind_offshore_forecast_entsoe"),
|
143 |
}
|
144 |
|
145 |
+
variable_options = {}
|
146 |
+
flagged_columns = []
|
|
|
147 |
|
148 |
+
for key, (col1, col2) in all_options.items():
|
149 |
+
col1_exists = col1 in df.columns and not df[col1].isna().all()
|
150 |
+
col2_exists = col2 in df.columns and not df[col2].isna().all()
|
151 |
+
if col1_exists and col2_exists:
|
152 |
+
variable_options[key] = (col1, col2)
|
153 |
+
elif not col1_exists and col2_exists:
|
154 |
+
flagged_columns.append(col1)
|
155 |
+
elif col1_exists and not col2_exists:
|
156 |
+
flagged_columns.append(col2)
|
157 |
+
elif not col1_exists and not col2_exists:
|
158 |
+
flagged_columns.append(col1)
|
159 |
+
flagged_columns.append(col2)
|
160 |
+
return variable_options, flagged_columns
|
|
|
|
|
161 |
|
162 |
github_token = st.secrets["GitHub_Token_KUL_Margarida"]
|
163 |
+
#countries = ['IT_CALA', 'IT_CNOR', 'IT_CSUD', 'IT_SARD', 'PT', 'FR']
|
164 |
|
165 |
if github_token:
|
166 |
+
data_dict = {}
|
167 |
+
for bdz in countries:
|
168 |
+
df = load_GitHub(github_token, bdz)
|
169 |
+
if df is not None:
|
170 |
+
data_dict[bdz] = df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
|
172 |
else:
|
173 |
print("Please enter your GitHub Personal Access Token to proceed.")
|
174 |
|
175 |
+
col1, col2 = st.columns([5, 2])
|
|
|
176 |
with col1:
|
177 |
st.title("Transparency++")
|
178 |
|
|
|
185 |
with col2_2:
|
186 |
st.image("energyville_logo.png", width=100)
|
187 |
|
188 |
+
st.write("**Evaluate and analyze ENTSO-E Transparency Platform data quality, forecast accuracy, and energy trends for ENTSO-E member countries.**")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
189 |
|
190 |
st.sidebar.header('Filters')
|
191 |
|
192 |
st.sidebar.subheader("Select Country")
|
193 |
st.sidebar.caption("Choose the country for which you want to display data or forecasts.")
|
194 |
+
selection = ['Overall'] + list(countries)
|
195 |
+
selected_country = st.sidebar.selectbox('Select Country', selection)
|
196 |
|
|
|
|
|
|
|
197 |
if selected_country != 'Overall':
|
198 |
st.sidebar.subheader("Section")
|
199 |
st.sidebar.caption("Select the type of information you want to explore.")
|
200 |
+
section = st.sidebar.radio('', ['Data Quality', 'Forecasts Quality', 'Insights'], index=1)
|
201 |
else:
|
202 |
section = None # No section is shown when "Overall" is selected
|
203 |
|
|
|
205 |
data = None # You can set data to None or a specific dataset based on your logic
|
206 |
section = None # No section selected when "Overall" is chosen
|
207 |
else:
|
208 |
+
country_code = selected_country
|
209 |
+
data = data_dict.get(selected_country)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
210 |
|
211 |
if section == 'Data Quality':
|
|
|
212 |
st.header('Data Quality')
|
|
|
|
|
213 |
|
214 |
+
# Determine if capacities missing per year
|
215 |
+
caps4 = installed_capacities_2024.get(country_code)
|
216 |
+
caps5 = installed_capacities_2025.get(country_code)
|
217 |
|
218 |
+
st.write(
|
219 |
+
"The table below presents the data quality metrics focusing on the percentage "
|
220 |
+
"of missing values and the occurrence of extreme or nonsensical values for "
|
221 |
+
"the selected country. Additionally, it flags any mismatch between installed "
|
222 |
+
"capacity (NaN or 0) and actual data in the dataset."
|
223 |
+
)
|
224 |
+
|
225 |
+
# Determine end of data slice (yesterday 23:59:59)
|
226 |
+
yesterday = datetime.datetime.now(tz).date() - datetime.timedelta(days=1)
|
227 |
+
end_time = pd.Timestamp(yesterday).replace(hour=23, minute=59, second=59)
|
228 |
+
# Filter data
|
229 |
+
data_quality = data[data.index <= end_time]
|
230 |
+
|
231 |
+
tech_cols = {
|
232 |
+
'Load': ('Load_entsoe', 'Load_forecast_entsoe'),
|
233 |
+
'Wind Onshore': ('Wind_onshore_entsoe', 'Wind_onshore_forecast_entsoe'),
|
234 |
+
'Wind Offshore': ('Wind_offshore_entsoe', 'Wind_offshore_forecast_entsoe'),
|
235 |
+
'Solar': ('Solar_entsoe', 'Solar_forecast_entsoe'),
|
236 |
+
}
|
237 |
|
238 |
+
skip_cols = []
|
239 |
+
|
240 |
+
for tech_key, (act_col, fct_col) in tech_cols.items():
|
241 |
+
# only proceed if the columns are in the DataFrame
|
242 |
+
if act_col in data_quality.columns and fct_col in data_quality.columns:
|
243 |
+
# get installed capacities for 2024 & 2025
|
244 |
+
cap4 = caps4.get(tech_key, np.nan) if isinstance(caps4, dict) else np.nan
|
245 |
+
cap5 = caps5.get(tech_key, np.nan) if isinstance(caps5, dict) else np.nan
|
246 |
+
|
247 |
+
# if both years are missing or zero capacity
|
248 |
+
if (pd.isna(cap4) or cap4 == 0) and (pd.isna(cap5) or cap5 == 0):
|
249 |
+
act = data_quality[act_col]
|
250 |
+
fct = data_quality[fct_col]
|
251 |
+
# check if actual AND forecast are entirely zero or NaN
|
252 |
+
only_zero_or_na = (act.fillna(0) == 0).all() and (fct.fillna(0) == 0).all()
|
253 |
+
if only_zero_or_na:
|
254 |
+
skip_cols += [act_col, fct_col]
|
255 |
+
|
256 |
+
# drop any columns flagged for skipping (ignore errors if somehow missing)
|
257 |
+
if skip_cols:
|
258 |
+
data_quality = data_quality.drop(columns=skip_cols, errors='ignore')
|
259 |
+
|
260 |
+
# Compute missing
|
261 |
+
missing_values = data_quality.isna().mean() * 100
|
262 |
missing_values = missing_values.round(2)
|
263 |
|
264 |
+
extreme_values = {}
|
265 |
+
capacity_mismatch = {}
|
266 |
+
neg_counts = {}
|
267 |
+
over_counts = {}
|
268 |
+
cutoff = pd.Timestamp('2025-01-01')
|
269 |
+
|
270 |
+
# Iterate over columns
|
271 |
+
for col in data_quality.columns:
|
272 |
+
# Identify technology
|
273 |
+
if 'Solar' in col:
|
274 |
+
tech_key = 'Solar'
|
275 |
+
elif 'Wind_onshore' in col:
|
276 |
+
tech_key = 'Wind Onshore'
|
277 |
+
elif 'Wind_offshore' in col:
|
278 |
+
tech_key = 'Wind Offshore'
|
279 |
+
elif 'Load' in col:
|
280 |
+
tech_key = 'Load'
|
281 |
+
else:
|
282 |
+
extreme_values[col] = np.nan
|
283 |
+
capacity_mismatch[col] = np.nan
|
284 |
+
continue
|
285 |
+
|
286 |
+
series = data_quality[col]
|
287 |
+
# Year masks
|
288 |
+
mask_2024 = series.index < cutoff
|
289 |
+
# Fetch capacity values
|
290 |
+
cap4 = caps4.get(tech_key, np.nan) if isinstance(caps4, dict) else np.nan
|
291 |
+
cap5 = caps5.get(tech_key, np.nan) if isinstance(caps5, dict) else np.nan
|
292 |
+
print('var:',col)
|
293 |
+
print('cap4:',cap4)
|
294 |
+
if tech_key == 'Load':
|
295 |
+
# Negative load
|
296 |
+
extreme_pct = round((series < 0).mean() * 100, 2)
|
297 |
+
mismatch = np.nan
|
298 |
+
else:
|
299 |
+
# Create per-timestamp capacity
|
300 |
+
cap_series = pd.Series(
|
301 |
+
np.where(mask_2024, cap4, cap5),
|
302 |
+
index=series.index
|
303 |
+
)
|
304 |
+
# Flags
|
305 |
+
neg = series < 0
|
306 |
+
over = (series > cap_series) & cap_series.notna()
|
307 |
+
nonsense = neg | over
|
308 |
+
extreme_pct = round(nonsense.mean() * 100, 2)
|
309 |
+
# Mismatch: non-zero gen when cap missing or zero
|
310 |
+
# cap4, cap5 are floats or NaN
|
311 |
+
no_cap_2024 = pd.isna(cap4) or (cap4 == 0)
|
312 |
+
no_cap_2025 = pd.isna(cap5) or (cap5 == 0)
|
313 |
+
|
314 |
+
# check if there's at least one actual non-zero (treat NaN as 0)
|
315 |
+
has_nonzero = (series.fillna(0) != 0).any()
|
316 |
+
|
317 |
+
if no_cap_2024 and no_cap_2025 and has_nonzero:
|
318 |
+
mismatch = 100.0
|
319 |
+
else:
|
320 |
+
mismatch = 0.0
|
321 |
|
322 |
+
extreme_values[col] = extreme_pct
|
323 |
+
capacity_mismatch[col] = mismatch
|
324 |
+
|
325 |
+
display_extreme = {col: f"{val:.2f}" if not pd.isna(val) else ''
|
326 |
+
for col, val in extreme_values.items()}
|
327 |
+
display_mismatch = {}
|
328 |
+
for col, val in capacity_mismatch.items():
|
329 |
+
if 'Load' in col:
|
330 |
+
display_mismatch[col] = '-'
|
331 |
+
else:
|
332 |
+
display_mismatch[col] = '🚩' if val == 100.0 else ''
|
333 |
|
334 |
+
# Build and render DataFrame
|
335 |
+
metrics_df = pd.DataFrame({
|
336 |
+
'Missing Values (%)': missing_values,
|
337 |
+
'Extreme/Nonsensical Values (%)': pd.Series(display_extreme),
|
338 |
+
'Capacity Mismatch Flag': pd.Series(display_mismatch)
|
339 |
+
})
|
340 |
|
341 |
+
st.dataframe(metrics_df.style.format({
|
342 |
+
'Missing Values (%)': '{:.2f}',
|
343 |
+
'Extreme/Nonsensical Values (%)': '{}'
|
344 |
+
}))
|
345 |
+
|
346 |
+
st.write('<b><u>Missing values (%)</u></b>: Percentage of missing values in the dataset',unsafe_allow_html=True)
|
347 |
+
st.write('<b><u>Extreme/Nonsensical values (%)</u></b>: For Load, this is % of values below 0. For generation, it is negative or out-of-bound (> capacity).',unsafe_allow_html=True)
|
348 |
+
st.write('<b><u>Capacity Mismatch Flag</u></b>: Shows "🚩" if installed capacity is `NaN` or `0` but the dataset has non-zero generation. Blank otherwise. For Load columns, it is "-".',unsafe_allow_html=True)
|
349 |
|
350 |
elif section == 'Forecasts Quality':
|
351 |
+
|
352 |
st.header('Forecast Quality')
|
353 |
|
354 |
# Time series for last 1 week
|
355 |
last_week = data.loc[data.index >= (data.index[-1] - pd.Timedelta(days=7))]
|
356 |
st.write('The below plot shows the time series of forecasts vs. observations provided by the ENTSO-E Transparency platform from the past week.')
|
357 |
+
variable_options, flagged_columns = filter_variable_options(last_week)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
358 |
# Dropdown to select the variable
|
359 |
selected_variable = st.selectbox("Select Variable for Line PLot", list(variable_options.keys()))
|
|
|
|
|
360 |
actual_col, forecast_col = variable_options[selected_variable]
|
361 |
|
362 |
+
x_vals = last_week.index.to_pydatetime().tolist()
|
363 |
+
y_actual = last_week[actual_col].tolist()
|
364 |
+
y_forecast = last_week[forecast_col].tolist()
|
365 |
+
|
366 |
+
# then plot
|
367 |
fig = go.Figure()
|
368 |
+
fig.add_trace(go.Scatter(x=x_vals,y=y_actual,mode="lines",name="Actual"))
|
369 |
+
fig.add_trace(go.Scatter(x=x_vals,y=y_forecast,mode="lines",name="Forecast ENTSO-E"))
|
370 |
+
fig.update_layout(title=f"Forecasts vs Actual for {selected_variable}",xaxis_title="Date",yaxis_title="Value [MW]")
|
|
|
371 |
st.plotly_chart(fig)
|
372 |
|
373 |
+
|
374 |
# Scatter plots for error distribution
|
375 |
st.subheader('Error Distribution')
|
376 |
st.write('The below scatter plots show the error distribution of all fields: Solar, Wind and Load.')
|
|
|
379 |
# Get the corresponding columns for the selected variable
|
380 |
actual_col, forecast_col = variable_options[selected_variable]
|
381 |
|
382 |
+
if forecast_col in data.columns:
|
383 |
+
# grab the two series, drop any NaNs, and align on their common timestamps
|
384 |
+
obs = data[actual_col].dropna()
|
385 |
+
pred = data[forecast_col].dropna()
|
386 |
+
idx = obs.index.intersection(pred.index)
|
387 |
+
obs = obs.loc[idx]
|
388 |
+
pred = pred.loc[idx]
|
389 |
+
|
390 |
+
# convert to pure Python lists
|
391 |
+
x_vals = obs.tolist()
|
392 |
+
y_vals = pred.tolist()
|
393 |
+
|
394 |
+
fig = go.Figure()
|
395 |
+
fig.add_trace(go.Scatter(x=x_vals,y=y_vals,mode='markers',name=f'{selected_variable}'))
|
396 |
+
fig.update_layout(title=f'Error Distribution for {selected_variable}',xaxis_title='Observed [MW]',yaxis_title='Forecast ENTSO-E [MW]')
|
397 |
+
|
398 |
st.plotly_chart(fig)
|
399 |
+
|
400 |
st.subheader('Accuracy Metrics (Sorted by rMAE):')
|
401 |
|
402 |
date_range = st.date_input(
|
|
|
410 |
else:
|
411 |
st.error("Please select a valid date range.")
|
412 |
st.stop()
|
413 |
+
output_text = f"The below metrics are calculated from the selected date range from {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}. On the right is a radar plot with the rMAE."
|
|
|
|
|
414 |
st.write(output_text)
|
415 |
|
416 |
+
data_metrics = data.loc[start_date:end_date]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
417 |
|
418 |
+
accuracy_metrics = pd.DataFrame(columns=['MAE', 'RMSE' ,'rMAE'], index=list(variable_options.keys()))
|
419 |
|
420 |
+
for variable in variable_options.keys():
|
421 |
+
actual_col, forecast_col = variable_options[variable]
|
422 |
+
obs = data_metrics[actual_col]
|
423 |
+
pred = data_metrics[forecast_col]
|
424 |
+
error = pred - obs
|
425 |
+
|
426 |
+
mae = round(np.mean(np.abs(error)),2)
|
427 |
+
if 'Load' in actual_col:
|
428 |
+
persistence = obs.shift(168) # Weekly persistence
|
429 |
+
else:
|
430 |
+
persistence = obs.shift(24) # Daily persistence
|
431 |
+
|
432 |
+
# Using the whole year's data for rMAE calculations
|
433 |
+
rmae = round(mae / np.mean(np.abs(obs - persistence)),2)
|
434 |
+
rmse = round(np.sqrt(np.mean((error)**2)), 2)
|
435 |
+
row_label = variable #'Load' if 'Load' in actual_col else 'Solar' if 'Solar' in actual_col else 'Wind Offshore' if 'Wind_offshore' in actual_col else 'Wind Onshore'
|
436 |
+
accuracy_metrics.loc[row_label] = [mae, rmse, rmae]
|
|
|
|
|
437 |
|
438 |
accuracy_metrics.dropna(how='all', inplace=True)# Sort by rMAE (second column)
|
439 |
+
accuracy_metrics.sort_values(by=accuracy_metrics.columns[-1], ascending=True, inplace=True)
|
440 |
accuracy_metrics = accuracy_metrics.round(4)
|
441 |
|
442 |
+
col1, col2 = st.columns([1, 1])
|
443 |
|
444 |
with col1:
|
445 |
+
# (optional) some top-margin before the table
|
446 |
st.markdown(
|
447 |
"""
|
448 |
<style>
|
449 |
+
.small-chart-container {
|
450 |
+
margin-top: 0px;
|
451 |
}
|
452 |
</style>
|
453 |
""",
|
454 |
unsafe_allow_html=True
|
455 |
)
|
456 |
st.dataframe(accuracy_metrics)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
457 |
|
458 |
with col2:
|
459 |
+
# prepare the data
|
|
|
460 |
rmae_values = accuracy_metrics['rMAE'].tolist()
|
461 |
+
categories = accuracy_metrics.index.tolist()
|
462 |
+
|
463 |
+
# build the radar
|
464 |
+
fig = go.Figure(
|
465 |
+
go.Scatterpolar(
|
466 |
+
r=rmae_values,
|
467 |
+
theta=categories,
|
468 |
+
fill='toself',
|
469 |
+
name='rMAE'
|
470 |
+
)
|
471 |
+
)
|
472 |
+
|
473 |
+
# 👉 shrink the total size, and give extra left/right margin for your labels
|
474 |
fig.update_layout(
|
475 |
+
width=300, # make the whole plot a bit smaller
|
476 |
+
height=300,
|
477 |
+
margin=dict(
|
478 |
+
l=50, # more space on the left for long category names
|
479 |
+
r=60, # and on the right, if needed
|
480 |
+
t=20,
|
481 |
+
b=20
|
482 |
+
),
|
483 |
polar=dict(
|
484 |
+
angularaxis=dict(
|
485 |
+
tickfont=dict(size=11) # if you want slightly smaller ticks
|
486 |
+
),
|
487 |
radialaxis=dict(
|
488 |
visible=True,
|
489 |
+
range=[0, max(rmae_values)*1.2]
|
490 |
+
)
|
491 |
+
),
|
492 |
showlegend=False
|
493 |
)
|
494 |
+
|
495 |
+
# wrap in a div so you can still control vertical spacing via CSS
|
496 |
+
st.markdown('<div class="small-chart-container">', unsafe_allow_html=True)
|
497 |
+
st.plotly_chart(fig, use_container_width=False)
|
498 |
+
st.markdown('</div>', unsafe_allow_html=True)
|
499 |
|
500 |
st.subheader('ACF plots of Errors')
|
501 |
st.write('The below plots show the ACF (Auto-Correlation Function) for the errors of all three data fields obtained from ENTSO-E: Solar, Wind and Load.')
|
|
|
519 |
|
520 |
# Optionally calculate and store ACF values for further analysis if needed
|
521 |
acf_values = acf(error.dropna(), nlags=240)
|
522 |
+
|
523 |
elif section == 'Insights':
|
524 |
st.header("Insights")
|
525 |
|
|
|
531 |
|
532 |
# Resample data based on the selected resolution
|
533 |
if resolution == 'Hourly':
|
534 |
+
resampled_data = data
|
535 |
elif resolution == 'Daily':
|
536 |
+
resampled_data = data.resample('D').mean() # Resample to daily mean
|
537 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
538 |
|
539 |
+
resampled_data.columns = [col.replace('_entsoe', '').replace('_', ' ') for col in resampled_data.columns]
|
|
|
540 |
|
541 |
# Drop missing values
|
542 |
+
selected_df = resampled_data.dropna()
|
543 |
|
544 |
# Create the scatter plots using seaborn's pairplot
|
545 |
sns.set_theme(style="ticks")
|
|
|
550 |
|
551 |
elif selected_country == 'Overall':
|
552 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
553 |
def calculate_net_load_error(df, country_code):
|
554 |
+
#filter_df = df.dropna()
|
555 |
+
filter_df = df.dropna(axis=1, how='all')
|
556 |
+
filter_df = filter_df.dropna()
|
557 |
+
|
558 |
+
if filter_df.empty:
|
559 |
+
# Return something (e.g., None) if there's no data left
|
560 |
+
print(country_code)
|
561 |
+
return None, None
|
562 |
+
net_load = filter_df['Load_entsoe'].copy()
|
563 |
+
for col in ['Wind_onshore_entsoe', 'Solar_entsoe', 'Wind_offshore_entsoe']:
|
564 |
+
if col in filter_df.columns:
|
565 |
+
net_load -= filter_df[col]
|
566 |
+
|
567 |
+
net_load_forecast = filter_df['Load_forecast_entsoe'].copy()
|
568 |
+
for col in ['Wind_onshore_forecast_entsoe', 'Solar_forecast_entsoe', 'Wind_offshore_forecast_entsoe']:
|
569 |
+
if col in filter_df.columns:
|
570 |
+
net_load_forecast -= filter_df[col]
|
571 |
# Calculate the error based on the latest values
|
572 |
error = (net_load_forecast - net_load).iloc[-1]
|
573 |
date = filter_df.index[-1].strftime("%Y-%m-%d %H:%M") # Get the latest date in string format
|
|
|
575 |
return error, date
|
576 |
|
577 |
def plot_net_load_error_map(data_dict):
|
578 |
+
# 1) compute your errors as before
|
579 |
+
missing_zones={'ME','IE_SEM','MK','CY','BA','AL','XK'}
|
580 |
+
net_load_errors = {
|
581 |
+
country_code: calculate_net_load_error(data, country_code)
|
582 |
+
for country_code, data in data_dict.items()
|
583 |
+
}
|
584 |
df_net_load_error = pd.DataFrame({
|
585 |
+
"zoneName": list(net_load_errors),
|
586 |
+
"net_load_error": [v[0] for v in net_load_errors.values()],
|
587 |
+
"date": [v[1] for v in net_load_errors.values()],
|
588 |
})
|
589 |
|
590 |
+
# 2) split your zones into standard vs. fallback
|
591 |
+
selected = list(data_dict.keys())
|
592 |
+
standard_zones = [z for z in selected if z not in missing_zones]
|
593 |
+
fallback_zones = [z for z in selected if z in missing_zones]
|
594 |
+
|
595 |
+
# 3a) load the standard ones with entsoe.load_zones
|
596 |
date = pd.Timestamp.now()
|
597 |
+
geo_std = load_zones(standard_zones, date).reset_index()
|
598 |
|
599 |
+
# 3b) manually load the fallback ones
|
600 |
+
gdfs = []
|
601 |
+
for z in fallback_zones:
|
602 |
+
fn = f"{z}.geojson"
|
603 |
+
path = f'./geojson_missing/{fn}'
|
604 |
+
g = gpd.read_file(path)
|
605 |
+
g['zoneName'] = z
|
606 |
+
gdfs.append(g)
|
607 |
|
|
|
|
|
|
|
608 |
|
609 |
+
geo_fb = pd.concat(gdfs, ignore_index=True) if gdfs else gpd.GeoDataFrame()
|
610 |
+
|
611 |
+
# 4) combine
|
612 |
+
geo_data = pd.concat([geo_std, geo_fb], ignore_index=True)
|
613 |
# Merge net_load_error and date into geo_data
|
614 |
geo_data = geo_data.merge(df_net_load_error, on='zoneName', how='left')
|
615 |
|
|
|
646 |
geo_data,
|
647 |
style_function=style_function,
|
648 |
tooltip=folium.GeoJsonTooltip(
|
649 |
+
fields=["zoneName", "net_load_error", "date"],
|
650 |
aliases=["Country:", "Net Load Error [MW]:", "Date:"],
|
651 |
localize=True
|
652 |
)
|
|
|
656 |
colormap.add_to(m)
|
657 |
|
658 |
# Display the map
|
659 |
+
_=st_folium(m, width=700, height=600)
|
660 |
|
661 |
def calculate_mae(actual, forecast):
|
662 |
return np.mean(np.abs(actual - forecast))
|
|
|
664 |
def calculate_persistence_mae(data, shift_hours):
|
665 |
return np.mean(np.abs(data - data.shift(shift_hours)))
|
666 |
|
667 |
+
def calculate_rmae_for_country(df, variable_options):
|
668 |
rmae = {}
|
669 |
rmae['Load'] = calculate_mae(df['Load_entsoe'], df['Load_forecast_entsoe']) / calculate_persistence_mae(df['Load_entsoe'], 168)
|
670 |
+
|
671 |
+
for variable in variable_options.keys():
|
672 |
+
actual_col, forecast_col = variable_options[variable]
|
673 |
+
rmae[variable] = calculate_mae(df[actual_col], df[forecast_col]) / calculate_persistence_mae(df[actual_col], 24)
|
|
|
|
|
|
|
|
|
|
|
674 |
|
675 |
+
all_opt = ["Load", "Solar", "Wind Onshore", "Wind Offshore"]
|
676 |
+
not_in_list2 = [elem for elem in all_opt if elem not in variable_options.keys()]
|
677 |
+
|
678 |
+
for ele in not_in_list2:
|
679 |
+
rmae[ele] = None
|
680 |
+
|
681 |
return rmae
|
682 |
|
683 |
def create_rmae_dataframe(data_dict):
|
684 |
|
685 |
+
rmae_values = {'Country': [], 'Load': [], 'Wind Onshore': [], 'Wind Offshore': [], 'Solar': []}
|
686 |
|
687 |
for country_name, df in data_dict.items():
|
688 |
+
df_filtered = df.dropna()
|
689 |
+
print(country_name)
|
690 |
+
variable_options, flagged_columns = filter_variable_options(df_filtered)
|
691 |
+
rmae = calculate_rmae_for_country(df_filtered, variable_options)
|
692 |
|
693 |
rmae_values['Country'].append(country_name)
|
|
|
|
|
|
|
694 |
|
695 |
+
for var, met in rmae.items():
|
696 |
+
rmae_values[var].append(met)
|
|
|
|
|
|
|
697 |
|
698 |
return pd.DataFrame(rmae_values)
|
699 |
|
|
|
701 |
fig = go.Figure()
|
702 |
|
703 |
# Dynamically adjust angles to exclude Wind_offshore if all values are NaN
|
704 |
+
angles = ['Load']
|
705 |
+
if not rmae_df['Wind Offshore'].isna().all(): # Only include Wind_offshore if it's not NaN for all countries
|
706 |
+
angles.append('Wind Offshore')
|
707 |
+
if not rmae_df['Wind Onshore'].isna().all(): # Only include Wind_offshore if it's not NaN for all countries
|
708 |
+
angles.append('Wind Onshore')
|
709 |
+
if not rmae_df['Solar'].isna().all(): # Only include Wind_offshore if it's not NaN for all countries
|
710 |
+
angles.append('Solar')
|
711 |
+
|
712 |
for _, row in rmae_df.iterrows():
|
713 |
fig.add_trace(go.Scatterpolar(
|
714 |
r=[row[angle] for angle in angles],
|
|
|
748 |
|
749 |
# Plot radar chart for the selected countries
|
750 |
plot_rmae_radar_chart(filtered_rmae_df)
|
|
|
|
|
|
|
|