mmmapms commited on
Commit
653e93b
·
verified ·
1 Parent(s): 330cb0a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +430 -421
app.py CHANGED
@@ -2,35 +2,120 @@ import requests
2
  import pandas as pd
3
  from io import StringIO
4
  import streamlit as st
5
- import os
6
  import plotly.express as px
7
  import plotly.graph_objects as go
8
- import plotly.colors as pc
9
  import numpy as np
10
- from sklearn.metrics import mean_squared_error
11
  from statsmodels.tsa.stattools import acf
12
  from statsmodels.graphics.tsaplots import plot_acf
13
  import matplotlib.pyplot as plt
14
- from datetime import datetime
15
  import folium
16
- import seaborn as sns
17
  from streamlit_folium import st_folium
18
- from datetime import datetime, timedelta
 
19
  from entsoe.geo import load_zones
20
- from branca.colormap import LinearColormap
21
  import branca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
 
 
 
23
 
24
- def get_current_time():
25
- now = datetime.now()
26
- current_hour = now.hour
27
- current_minute = now.minute
28
- # Return the hour and a boolean indicating if it is after the 10th minute
29
- return current_hour, current_minute >= 10
30
 
31
- ##GET ALL FILES FROM GITHUB
32
- @st.cache_data(show_spinner=False)
33
- def load_GitHub(github_token, file_name, hour, after_10_min):
34
  url = f'https://raw.githubusercontent.com/margaridamascarenhas/Transparency_Data/main/{file_name}'
35
  headers = {'Authorization': f'token {github_token}'}
36
 
@@ -42,102 +127,52 @@ def load_GitHub(github_token, file_name, hour, after_10_min):
42
  if 'Date' in df.columns:
43
  df['Date'] = pd.to_datetime(df['Date']) # Convert 'Date' column to datetime
44
  df.set_index('Date', inplace=True) # Set 'Date' column as the index
45
- #df.to_csv(file_name)
46
- return df
 
47
  else:
48
  print(f"Failed to download {file_name}. Status code: {response.status_code}")
49
  return None
50
-
51
- @st.cache_data(show_spinner=False)
52
- def load_forecast(github_token, hour, after_10_min):
53
- predictions_dict = {}
54
- for hour in range(24):
55
- file_name = f'Predictions_{hour}h.csv'
56
- df = load_GitHub(github_token, file_name, hour, after_10_min)
57
- if df is not None:
58
- predictions_dict[file_name] = df
59
- return predictions_dict
60
-
61
- def convert_European_time(data, time_zone):
62
- data.index = pd.to_datetime(data.index, utc=True)
63
- data.index = data.index.tz_convert(time_zone)
64
- data.index = data.index.tz_localize(None)
65
- return data
66
 
67
- def simplify_model_names(df):
68
- # Define the mapping of complex names to simpler ones
69
- replacements = {
70
- r'\.LightGBMModel\.\dD\.TimeCov\.Temp\.Forecast_elia': '.LightGBM_with_Forecast_elia',
71
- r'\.LightGBMModel\.\dD\.TimeCov\.Temp': '.LightGBM',
72
- r'\.Naive\.\dD': '.Naive',
73
  }
74
 
75
- # Apply the replacements
76
- for original, simplified in replacements.items():
77
- df.columns = df.columns.str.replace(original, simplified, regex=True)
78
 
79
- return df
80
-
81
- def simplify_model_names_in_index(df):
82
- # Define the mapping of complex names to simpler ones
83
- replacements = {
84
- r'\.LightGBMModel\.\dD\.TimeCov\.Temp\.Forecast_elia': '.LightGBM_with_Forecast_elia',
85
- r'\.LightGBMModel\.\dD\.TimeCov\.Temp': '.LightGBM',
86
- r'\.Naive\.\dD': '.Naive',
87
- }
88
-
89
- # Apply the replacements to the DataFrame index
90
- for original, simplified in replacements.items():
91
- df.index = df.index.str.replace(original, simplified, regex=True)
92
-
93
- return df
94
 
95
  github_token = st.secrets["GitHub_Token_KUL_Margarida"]
 
96
 
97
  if github_token:
98
- hour, after_10_min=get_current_time()
99
- forecast_dict = load_forecast(github_token, hour, after_10_min)
100
-
101
- historical_forecast=load_GitHub(github_token, 'Historical_forecast.csv', hour, after_10_min)
102
-
103
- Data_BE=load_GitHub(github_token, 'BE_Elia_Entsoe_UTC.csv', hour, after_10_min)
104
- Data_FR=load_GitHub(github_token, 'FR_Entsoe_UTC.csv', hour, after_10_min)
105
- Data_NL=load_GitHub(github_token, 'NL_Entsoe_UTC.csv', hour, after_10_min)
106
- Data_DE=load_GitHub(github_token, 'DE_Entsoe_UTC.csv', hour, after_10_min)
107
- Data_PT=load_GitHub(github_token, 'PT_Entsoe_UTC.csv', hour, after_10_min)
108
- Data_ES=load_GitHub(github_token, 'ES_Entsoe_UTC.csv', hour, after_10_min)
109
- Data_AT=load_GitHub(github_token, 'AT_Entsoe_UTC.csv', hour, after_10_min)
110
- Data_IT_CALA=load_GitHub(github_token, 'IT_CALA_Entsoe_UTC.csv', hour, after_10_min)
111
- Data_IT_CNOR=load_GitHub(github_token, 'IT_CNOR_Entsoe_UTC.csv', hour, after_10_min)
112
- Data_IT_CSUD=load_GitHub(github_token, 'IT_CSUD_Entsoe_UTC.csv', hour, after_10_min)
113
- Data_IT_NORD=load_GitHub(github_token, 'IT_NORD_Entsoe_UTC.csv', hour, after_10_min)
114
- Data_IT_SICI=load_GitHub(github_token, 'IT_SICI_Entsoe_UTC.csv', hour, after_10_min)
115
- Data_IT_SUD=load_GitHub(github_token, 'IT_SUD_Entsoe_UTC.csv', hour, after_10_min)
116
- Data_DK_1=load_GitHub(github_token, 'DK_1_Entsoe_UTC.csv', hour, after_10_min)
117
- Data_DK_2=load_GitHub(github_token, 'DK_2_Entsoe_UTC.csv', hour, after_10_min)
118
-
119
- Data_BE=convert_European_time(Data_BE, 'Europe/Brussels')
120
- Data_FR=convert_European_time(Data_FR, 'Europe/Paris')
121
- Data_NL=convert_European_time(Data_NL, 'Europe/Amsterdam')
122
- Data_DE=convert_European_time(Data_DE, 'Europe/Berlin')
123
- Data_PT=convert_European_time(Data_PT, 'Europe/Lisbon')
124
- Data_ES=convert_European_time(Data_ES, 'Europe/Madrid')
125
- Data_AT=convert_European_time(Data_AT, 'Europe/Vienna')
126
- Data_IT_CALA = convert_European_time(Data_IT_CALA, 'Europe/Rome')
127
- Data_IT_CNOR = convert_European_time(Data_IT_CNOR, 'Europe/Rome')
128
- Data_IT_CSUD = convert_European_time(Data_IT_CSUD, 'Europe/Rome')
129
- Data_IT_NORD = convert_European_time(Data_IT_NORD, 'Europe/Rome')
130
- Data_IT_SICI = convert_European_time(Data_IT_SICI, 'Europe/Rome')
131
- Data_IT_SUD = convert_European_time(Data_IT_SUD, 'Europe/Rome')
132
- Data_DK_1 = convert_European_time(Data_DK_1, 'Europe/Copenhagen')
133
- Data_DK_2 = convert_European_time(Data_DK_2, 'Europe/Copenhagen')
134
-
135
 
136
  else:
137
  print("Please enter your GitHub Personal Access Token to proceed.")
138
 
139
-
140
- col1, col2 = st.columns([5, 2]) # Adjust the ratio to better fit your layout needs
141
  with col1:
142
  st.title("Transparency++")
143
 
@@ -150,85 +185,19 @@ with col2:
150
  with col2_2:
151
  st.image("energyville_logo.png", width=100)
152
 
153
-
154
- st.write("**Evaluate and analyze ENTSO-E Transparency Platform data quality, forecast accuracy, and energy trends for Portugal, Spain, Belgium, France, Germany-Luxembourg, Austria, the Netherlands, Italy and Denmark.**")
155
-
156
- upper_space.markdown("""
157
-  
158
-  
159
- """, unsafe_allow_html=True)
160
-
161
- countries = {
162
- 'Overall': 'Overall',
163
- 'Austria': 'AT',
164
- 'Belgium': 'BE',
165
- 'Denmark 1': 'DK_1',
166
- 'Denmark 2': 'DK_2',
167
- 'France': 'FR',
168
- 'Germany-Luxembourg': 'DE_LU',
169
- 'Italy Calabria': 'IT_CALA',
170
- 'Italy Central North': 'IT_CNOR',
171
- 'Italy Central South': 'IT_CSUD',
172
- 'Italy North': 'IT_NORD',
173
- 'Italy Sicily': 'IT_SICI',
174
- 'Italy South': 'IT_SUD',
175
- 'Netherlands': 'NL',
176
- 'Portugal': 'PT',
177
- 'Spain': 'ES',
178
- }
179
-
180
- data_dict = {
181
- 'BE': Data_BE,
182
- 'FR': Data_FR,
183
- 'DE_LU': Data_DE,
184
- 'NL': Data_NL,
185
- 'PT': Data_PT,
186
- 'AT': Data_AT,
187
- 'ES': Data_ES,
188
- 'IT_CALA': Data_IT_CALA,
189
- 'IT_CNOR': Data_IT_CNOR,
190
- 'IT_CSUD': Data_IT_CSUD,
191
- 'IT_NORD': Data_IT_NORD,
192
- 'IT_SICI': Data_IT_SICI,
193
- 'IT_SUD': Data_IT_SUD,
194
- 'DK_1': Data_DK_1,
195
- 'DK_2': Data_DK_2,
196
- }
197
-
198
- countries_all_RES = ['BE', 'FR', 'NL', 'DE_LU', 'PT', 'DK_1', 'DK_2']
199
- countries_no_offshore= ['AT', 'ES', 'IT_CALA', 'IT_CNOR', 'IT_CSUD', 'IT_NORD', 'IT_SICI', 'IT_SUD',]
200
-
201
- installed_capacities = {
202
- 'FR': { 'Solar': 17419, 'Wind Offshore': 1483, 'Wind Onshore': 22134},
203
- 'DE_LU': { 'Solar': 73821, 'Wind Offshore': 8386, 'Wind Onshore': 59915},
204
- 'BE': { 'Solar': 8789, 'Wind Offshore': 2262, 'Wind Onshore': 3053},
205
- 'NL': { 'Solar': 22590, 'Wind Offshore': 3220, 'Wind Onshore': 6190},
206
- 'PT': { 'Solar': 1811, 'Wind Offshore': 25, 'Wind Onshore': 5333},
207
- 'ES': { 'Solar': 23867, 'Wind Onshore': 30159},
208
- 'AT': { 'Solar': 7294, 'Wind Onshore': 4021 },
209
- 'DK_1': { 'Solar': 2738, 'Wind Offshore': 1601, 'Wind Onshore': 4112},
210
- 'DK_2': { 'Solar': 992, 'Wind Offshore': 1045, 'Wind Onshore': 748},
211
- }
212
-
213
- forecast_columns_all_RES = [
214
- 'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Wind_offshore_entsoe','Wind_offshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
215
-
216
- forecast_columns_no_wind_offshore = [
217
- 'Load_entsoe','Load_forecast_entsoe','Wind_onshore_entsoe','Wind_onshore_forecast_entsoe','Solar_entsoe','Solar_forecast_entsoe']
218
-
219
 
220
  st.sidebar.header('Filters')
221
 
222
  st.sidebar.subheader("Select Country")
223
  st.sidebar.caption("Choose the country for which you want to display data or forecasts.")
 
 
224
 
225
- selected_country = st.sidebar.selectbox('Select Country', list(countries.keys()))
226
-
227
- # Sidebar with radio buttons for different sections
228
  if selected_country != 'Overall':
229
  st.sidebar.subheader("Section")
230
  st.sidebar.caption("Select the type of information you want to explore.")
231
- section = st.sidebar.radio('Section', ['Data Quality', 'Forecasts Quality', 'Insights'], index=1, label_visibility='collapsed')
232
  else:
233
  section = None # No section is shown when "Overall" is selected
234
 
@@ -236,124 +205,172 @@ if selected_country == 'Overall':
236
  data = None # You can set data to None or a specific dataset based on your logic
237
  section = None # No section selected when "Overall" is chosen
238
  else:
239
- country_code = countries[selected_country]
240
- data = data_dict.get(country_code)
241
- if country_code in countries_all_RES:
242
- forecast_columns = forecast_columns_all_RES
243
- elif country_code in countries_no_offshore:
244
- forecast_columns = forecast_columns_no_wind_offshore
245
- if country_code == 'BE':
246
- weather_columns = ['Temperature', 'Wind Speed Onshore', 'Wind Speed Offshore']
247
- data['Temperature'] = data['temperature_2m_8']
248
- data['Wind Speed Onshore'] = data['wind_speed_100m_8']
249
- data['Wind Speed Offshore'] = data['wind_speed_100m_4']
250
- else:
251
- weather_columns = ['Temperature', 'Wind Speed']
252
- data['Temperature'] = data['temperature_2m']
253
- data['Wind Speed'] = data['wind_speed_100m']
254
-
255
 
256
  if section == 'Data Quality':
257
-
258
  st.header('Data Quality')
259
-
260
- st.write('The table below presents the data quality metrics focusing on the percentage of missing values and the occurrence of extreme or nonsensical values for the selected country.')
261
 
262
- yesterday_midnight = pd.Timestamp(datetime.now().date() - pd.Timedelta(days=1)).replace(hour=23, minute=59, second=59)
 
 
263
 
264
- # Filter data until the end of yesterday (midnight)
265
- data_quality = data[data.index <= yesterday_midnight]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
 
267
- # Report % of missing values
268
- missing_values = data_quality[forecast_columns].isna().mean() * 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
  missing_values = missing_values.round(2)
270
 
271
- if country_code not in installed_capacities:
272
- st.markdown(f"⚠️ **Installed capacities not available on ENTSO-E Transparency Platform for country code '{country_code}'. Therefore, cannot calculate Extreme/Nonsensical values.**")
273
- # If capacities are not available, assign NaN to extreme_values and skip extreme value checking
274
- extreme_values = {col: np.nan for col in forecast_columns}
275
- else:
276
- capacities = installed_capacities[country_code]
277
- extreme_values = {}
278
-
279
- for col in forecast_columns:
280
- if 'Solar_entsoe' in col:
281
- extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Solar'])).mean() * 100
282
- elif 'Solar_forecast_entsoe' in col:
283
- extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Solar'])).mean() * 100
284
- elif 'Wind_onshore_entsoe' in col:
285
- extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Onshore'])).mean() * 100
286
- elif 'Wind_onshore_forecast_entsoe' in col:
287
- extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Onshore'])).mean() * 100
288
- elif 'Wind_offshore_entsoe' in col:
289
- extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Offshore'])).mean() * 100
290
- elif 'Wind_offshore_forecast_entsoe' in col:
291
- extreme_values[col] = ((data_quality[col] < 0) | (data_quality[col] > capacities['Wind Offshore'])).mean() * 100
292
- elif 'Load_entsoe' in col:
293
- extreme_values[col] = ((data_quality[col] < 0)).mean() * 100
294
- elif 'Load_forecast_entsoe' in col:
295
- extreme_values[col] = ((data_quality[col] < 0)).mean() * 100
296
-
297
- extreme_values = pd.Series(extreme_values).round(2)
298
- # Combine all metrics into one DataFrame
299
- metrics_df = pd.DataFrame({
300
- 'Missing Values (%)': missing_values,
301
- 'Extreme/Nonsensical Values (%)': extreme_values,
302
- })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
303
 
304
- st.markdown(
305
- """
306
- <style>
307
- .dataframe {font-size: 45px !important;}
308
- </style>
309
- """,
310
- unsafe_allow_html=True
311
- )
 
 
 
312
 
313
- st.dataframe(metrics_df)
 
 
 
 
 
314
 
315
- st.write('<b><u>Missing values (%)</u></b>: Percentage of missing values in the dataset', unsafe_allow_html=True)
316
- st.write('<b><u>Extreme/Nonsensical values (%)</u></b>: Values that are considered implausible such as negative or out-of-bound values i.e., (generation<0) or (generation>capacity)', unsafe_allow_html=True)
 
 
 
 
 
 
317
 
318
  elif section == 'Forecasts Quality':
319
-
320
  st.header('Forecast Quality')
321
 
322
  # Time series for last 1 week
323
  last_week = data.loc[data.index >= (data.index[-1] - pd.Timedelta(days=7))]
324
  st.write('The below plot shows the time series of forecasts vs. observations provided by the ENTSO-E Transparency platform from the past week.')
325
-
326
- # Options for selecting the data to display
327
- if country_code in countries_all_RES:
328
- variable_options = {
329
- "Load": ("Load_entsoe", "Load_forecast_entsoe"),
330
- "Solar": ("Solar_entsoe", "Solar_forecast_entsoe"),
331
- "Wind Onshore": ("Wind_onshore_entsoe", "Wind_onshore_forecast_entsoe"),
332
- "Wind Offshore": ("Wind_offshore_entsoe", "Wind_offshore_forecast_entsoe")
333
- }
334
- elif country_code in countries_no_offshore:
335
- variable_options = {
336
- "Load": ("Load_entsoe", "Load_forecast_entsoe"),
337
- "Solar": ("Solar_entsoe", "Solar_forecast_entsoe"),
338
- "Wind Onshore": ("Wind_onshore_entsoe", "Wind_onshore_forecast_entsoe"),
339
- }
340
- else:
341
- print('Country code doesnt correspond.')
342
-
343
  # Dropdown to select the variable
344
  selected_variable = st.selectbox("Select Variable for Line PLot", list(variable_options.keys()))
345
-
346
- # Get the corresponding columns for the selected variable
347
  actual_col, forecast_col = variable_options[selected_variable]
348
 
349
- # Plot only the selected variable's data
 
 
 
 
350
  fig = go.Figure()
351
- fig.add_trace(go.Scatter(x=last_week.index, y=last_week[actual_col], mode='lines', name='Actual'))
352
- fig.add_trace(go.Scatter(x=last_week.index, y=last_week[forecast_col], mode='lines', name='Forecast ENTSO-E'))
353
- fig.update_layout(title=f'Forecasts vs Actual for {selected_variable}', xaxis_title='Date', yaxis_title='Value [MW]')
354
-
355
  st.plotly_chart(fig)
356
 
 
357
  # Scatter plots for error distribution
358
  st.subheader('Error Distribution')
359
  st.write('The below scatter plots show the error distribution of all fields: Solar, Wind and Load.')
@@ -362,19 +379,24 @@ elif section == 'Forecasts Quality':
362
  # Get the corresponding columns for the selected variable
363
  actual_col, forecast_col = variable_options[selected_variable]
364
 
365
- # Filter data for the selected year and check if columns are available
366
- data_2024 = data[data.index.year > 2023]
367
- if forecast_col in data_2024.columns:
368
- obs = data_2024[actual_col]
369
- pred = data_2024[forecast_col]
370
-
371
- # Calculate error and plot
372
- error = pred - obs
373
- fig = px.scatter(x=obs, y=pred, labels={'x': 'Observed [MW]', 'y': 'Forecast ENTSO-E [MW]'})
374
- fig.update_layout(title=f'Error Distribution for {selected_variable}')
375
-
 
 
 
 
 
376
  st.plotly_chart(fig)
377
-
378
  st.subheader('Accuracy Metrics (Sorted by rMAE):')
379
 
380
  date_range = st.date_input(
@@ -388,99 +410,92 @@ elif section == 'Forecasts Quality':
388
  else:
389
  st.error("Please select a valid date range.")
390
  st.stop()
391
-
392
-
393
- output_text = f"The below metrics are calculated from the selected date range from {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}. "
394
  st.write(output_text)
395
 
396
- data = data.loc[start_date:end_date]
397
-
398
- if country_code in countries_all_RES:
399
- accuracy_metrics = pd.DataFrame(columns=['MAE', 'rMAE'], index=['Load', 'Solar', 'Wind Onshore', 'Wind Offshore'])
400
- elif country_code in countries_no_offshore:
401
- accuracy_metrics = pd.DataFrame(columns=['MAE', 'rMAE'], index=['Load', 'Solar', 'Wind Onshore'])
402
- else:
403
- print('Country code doesnt correspond.')
404
 
 
405
 
406
- for i in range(0, len(forecast_columns), 2):
407
- actual_col = forecast_columns[i]
408
- forecast_col = forecast_columns[i + 1]
409
- if forecast_col in data.columns:
410
- obs = data[actual_col]
411
- pred = data[forecast_col]
412
- error = pred - obs
413
-
414
- mae = round(np.mean(np.abs(error)),2)
415
- if 'Load' in actual_col:
416
- persistence = obs.shift(168) # Weekly persistence
417
- else:
418
- persistence = obs.shift(24) # Daily persistence
419
-
420
- # Using the whole year's data for rMAE calculations
421
- rmae = round(mae / np.mean(np.abs(obs - persistence)),2)
422
-
423
- row_label = 'Load' if 'Load' in actual_col else 'Solar' if 'Solar' in actual_col else 'Wind Offshore' if 'Wind_offshore' in actual_col else 'Wind Onshore'
424
- accuracy_metrics.loc[row_label] = [mae, rmae]
425
 
426
  accuracy_metrics.dropna(how='all', inplace=True)# Sort by rMAE (second column)
427
- accuracy_metrics.sort_values(by=accuracy_metrics.columns[1], ascending=True, inplace=True)
428
  accuracy_metrics = accuracy_metrics.round(4)
429
 
430
- col1, col2 = st.columns([1, 2])
431
 
432
  with col1:
 
433
  st.markdown(
434
  """
435
  <style>
436
- .small-chart {
437
- margin-top: 30px; /* Adjust this value as needed */
438
  }
439
  </style>
440
  """,
441
  unsafe_allow_html=True
442
  )
443
  st.dataframe(accuracy_metrics)
444
- st.markdown(
445
- """
446
- <style>
447
- .small-chart {
448
- margin-top: -30px; /* Adjust this value as needed */
449
- }
450
- </style>
451
- """,
452
- unsafe_allow_html=True
453
- )
454
 
455
  with col2:
456
-
457
- # Prepare data for the radar chart
458
  rmae_values = accuracy_metrics['rMAE'].tolist()
459
- categories = accuracy_metrics.index.tolist()
460
-
461
- fig = go.Figure()
462
- fig.add_trace(go.Scatterpolar(
463
- r=rmae_values,
464
- theta=categories,
465
- fill='toself',
466
- name='rMAE'
467
- ))
468
-
469
- # Configuring radar chart layout to be smaller
 
 
470
  fig.update_layout(
471
- width=250, # Adjust width
472
- height=250, # Adjust height
473
- margin=dict(t=20, b=20, l=0, r=0), # Remove all margins
 
 
 
 
 
474
  polar=dict(
 
 
 
475
  radialaxis=dict(
476
  visible=True,
477
- range=[0, max(rmae_values) * 1.2] # Adjust range dynamically
478
- )),
 
479
  showlegend=False
480
  )
481
-
482
- # Apply CSS class to remove extra space above chart
483
- st.plotly_chart(fig, use_container_width=True, config={'displayModeBar': False}, className="small-chart")
 
 
484
 
485
  st.subheader('ACF plots of Errors')
486
  st.write('The below plots show the ACF (Auto-Correlation Function) for the errors of all three data fields obtained from ENTSO-E: Solar, Wind and Load.')
@@ -504,7 +519,7 @@ elif section == 'Forecasts Quality':
504
 
505
  # Optionally calculate and store ACF values for further analysis if needed
506
  acf_values = acf(error.dropna(), nlags=240)
507
-
508
  elif section == 'Insights':
509
  st.header("Insights")
510
 
@@ -516,23 +531,15 @@ elif section == 'Insights':
516
 
517
  # Resample data based on the selected resolution
518
  if resolution == 'Hourly':
519
- resampled_data = data_2024
520
  elif resolution == 'Daily':
521
- resampled_data = data_2024.resample('D').mean() # Resample to daily mean
522
 
523
- # Select the necessary columns for the scatter plot
524
- if country_code in countries_all_RES:
525
- selected_columns = ['Load_entsoe', 'Solar_entsoe', 'Wind_offshore_entsoe', 'Wind_onshore_entsoe'] + weather_columns
526
- elif country_code in countries_no_offshore:
527
- selected_columns = ['Load_entsoe', 'Solar_entsoe', 'Wind_onshore_entsoe'] + weather_columns
528
- else:
529
- print('Country code doesnt correspond.')
530
 
531
- selected_df = resampled_data[selected_columns]
532
- selected_df.columns = [col.replace('_entsoe', '').replace('_', ' ') for col in selected_df.columns]
533
 
534
  # Drop missing values
535
- selected_df = selected_df.dropna()
536
 
537
  # Create the scatter plots using seaborn's pairplot
538
  sns.set_theme(style="ticks")
@@ -543,30 +550,24 @@ elif section == 'Insights':
543
 
544
  elif selected_country == 'Overall':
545
 
546
- def get_forecast_columns(country_code):
547
- if country_code in countries_all_RES:
548
- return forecast_columns_all_RES
549
- elif country_code in countries_no_offshore:
550
- return forecast_columns_no_wind_offshore
551
- else:
552
- print('Country code doesnt correspond.')
553
-
554
  def calculate_net_load_error(df, country_code):
555
- forecast_columns = get_forecast_columns(country_code)
556
- filter_df = df[forecast_columns].dropna()
557
-
558
- # Initialize net_load and net_load_forecast with Load and other available data
559
- net_load = filter_df['Load_entsoe'] - filter_df['Wind_onshore_entsoe'] - filter_df['Solar_entsoe']
560
- net_load_forecast = filter_df['Load_forecast_entsoe'] - filter_df['Wind_onshore_forecast_entsoe'] - filter_df['Solar_forecast_entsoe']
561
-
562
- # Subtract Wind_offshore_entsoe if the column exists
563
- if 'Wind_offshore_entsoe' in filter_df.columns:
564
- net_load -= filter_df['Wind_offshore_entsoe']
565
-
566
- # Subtract Wind_offshore_forecast_entsoe if the column exists
567
- if 'Wind_offshore_forecast_entsoe' in filter_df.columns:
568
- net_load_forecast -= filter_df['Wind_offshore_forecast_entsoe']
569
-
 
 
570
  # Calculate the error based on the latest values
571
  error = (net_load_forecast - net_load).iloc[-1]
572
  date = filter_df.index[-1].strftime("%Y-%m-%d %H:%M") # Get the latest date in string format
@@ -574,29 +575,41 @@ elif selected_country == 'Overall':
574
  return error, date
575
 
576
  def plot_net_load_error_map(data_dict):
577
- # Calculate net load errors and dates for each country
578
- net_load_errors = {country_code: calculate_net_load_error(data, country_code) for country_code, data in data_dict.items()}
579
-
580
- # Use country codes directly
581
- selected_country_codes = list(data_dict.keys())
582
-
583
  df_net_load_error = pd.DataFrame({
584
- 'zoneName': selected_country_codes,
585
- 'net_load_error': [v[0] for v in net_load_errors.values()],
586
- 'date': [v[1] for v in net_load_errors.values()]
587
  })
588
 
589
- # Load the GeoJSON data using the entsoe library
 
 
 
 
 
590
  date = pd.Timestamp.now()
591
- geo_data = load_zones(selected_country_codes, date)
592
 
593
- # Reset index to include 'zoneName' as a column
594
- geo_data = geo_data.reset_index()
 
 
 
 
 
 
595
 
596
- # Map country codes to country names
597
- countries_code_to_name = {v: k for k, v in countries.items()}
598
- geo_data['name'] = geo_data['zoneName'].map(countries_code_to_name)
599
 
 
 
 
 
600
  # Merge net_load_error and date into geo_data
601
  geo_data = geo_data.merge(df_net_load_error, on='zoneName', how='left')
602
 
@@ -633,7 +646,7 @@ elif selected_country == 'Overall':
633
  geo_data,
634
  style_function=style_function,
635
  tooltip=folium.GeoJsonTooltip(
636
- fields=["name", "net_load_error", "date"],
637
  aliases=["Country:", "Net Load Error [MW]:", "Date:"],
638
  localize=True
639
  )
@@ -643,7 +656,7 @@ elif selected_country == 'Overall':
643
  colormap.add_to(m)
644
 
645
  # Display the map
646
- _ = st_folium(m, width=700, height=600)
647
 
648
  def calculate_mae(actual, forecast):
649
  return np.mean(np.abs(actual - forecast))
@@ -651,40 +664,36 @@ elif selected_country == 'Overall':
651
  def calculate_persistence_mae(data, shift_hours):
652
  return np.mean(np.abs(data - data.shift(shift_hours)))
653
 
654
- def calculate_rmae_for_country(df):
655
  rmae = {}
656
  rmae['Load'] = calculate_mae(df['Load_entsoe'], df['Load_forecast_entsoe']) / calculate_persistence_mae(df['Load_entsoe'], 168)
657
- rmae['Wind_onshore'] = calculate_mae(df['Wind_onshore_entsoe'], df['Wind_onshore_forecast_entsoe']) / calculate_persistence_mae(df['Wind_onshore_entsoe'], 24)
658
-
659
- # Only calculate Wind_offshore rMAE if the columns exist
660
- if 'Wind_offshore_entsoe' in df.columns and 'Wind_offshore_forecast_entsoe' in df.columns:
661
- rmae['Wind_offshore'] = calculate_mae(df['Wind_offshore_entsoe'], df['Wind_offshore_forecast_entsoe']) / calculate_persistence_mae(df['Wind_offshore_entsoe'], 24)
662
- else:
663
- rmae['Wind_offshore'] = None # Mark as None if not applicable
664
-
665
- rmae['Solar'] = calculate_mae(df['Solar_entsoe'], df['Solar_forecast_entsoe']) / calculate_persistence_mae(df['Solar_entsoe'], 24)
666
 
 
 
 
 
 
 
667
  return rmae
668
 
669
  def create_rmae_dataframe(data_dict):
670
 
671
- rmae_values = {'Country': [], 'Load': [], 'Wind_onshore': [], 'Wind_offshore': [], 'Solar': []}
672
 
673
  for country_name, df in data_dict.items():
674
- forecast_columns=get_forecast_columns(country_name)
675
- df_filtered = df[forecast_columns].dropna()
676
- rmae = calculate_rmae_for_country(df_filtered)
 
677
 
678
  rmae_values['Country'].append(country_name)
679
- rmae_values['Load'].append(rmae['Load'])
680
- rmae_values['Wind_onshore'].append(rmae['Wind_onshore'])
681
- rmae_values['Solar'].append(rmae['Solar'])
682
 
683
- # Append Wind_offshore rMAE only if it's not None (i.e., the country has offshore wind data)
684
- if rmae['Wind_offshore'] is not None:
685
- rmae_values['Wind_offshore'].append(rmae['Wind_offshore'])
686
- else:
687
- rmae_values['Wind_offshore'].append(np.nan) # Insert NaN for countries without offshore wind
688
 
689
  return pd.DataFrame(rmae_values)
690
 
@@ -692,10 +701,14 @@ elif selected_country == 'Overall':
692
  fig = go.Figure()
693
 
694
  # Dynamically adjust angles to exclude Wind_offshore if all values are NaN
695
- angles = ['Load', 'Wind_onshore', 'Solar']
696
- if not rmae_df['Wind_offshore'].isna().all(): # Only include Wind_offshore if it's not NaN for all countries
697
- angles.append('Wind_offshore')
698
-
 
 
 
 
699
  for _, row in rmae_df.iterrows():
700
  fig.add_trace(go.Scatterpolar(
701
  r=[row[angle] for angle in angles],
@@ -735,7 +748,3 @@ elif selected_country == 'Overall':
735
 
736
  # Plot radar chart for the selected countries
737
  plot_rmae_radar_chart(filtered_rmae_df)
738
-
739
-
740
-
741
-
 
2
  import pandas as pd
3
  from io import StringIO
4
  import streamlit as st
 
5
  import plotly.express as px
6
  import plotly.graph_objects as go
 
7
  import numpy as np
 
8
  from statsmodels.tsa.stattools import acf
9
  from statsmodels.graphics.tsaplots import plot_acf
10
  import matplotlib.pyplot as plt
 
11
  import folium
 
12
  from streamlit_folium import st_folium
13
+ import seaborn as sns
14
+ import datetime
15
  from entsoe.geo import load_zones
 
16
  import branca
17
+ import pytz
18
+ import time
19
+ from entsoe import EntsoePandasClient
20
+ import geopandas as gpd
21
+
22
+
23
+ tz = pytz.timezone('Europe/Brussels')
24
+
25
+ def load_capacity_csv(path: str) -> dict:
26
+ """Load installed capacities CSV into a dict: Country -> {tech: value} """
27
+ df = pd.read_csv(path, index_col='Country')
28
+ # Ensure numeric and handle missing
29
+ df = df.replace({"NaN": np.nan}).astype(float)
30
+ return df.to_dict(orient='index')
31
+
32
+ # Load installed capacities from CSV files
33
+ installed_capacities_2024 = load_capacity_csv('installed_capacities_2024.csv')
34
+ installed_capacities_2025 = load_capacity_csv('installed_capacities_2025.csv')
35
+
36
+ TECHS = ['Solar', 'Wind Offshore', 'Wind Onshore']
37
+ #countries = [ 'AT', 'BE', 'NL', 'BG', 'HR', 'CZ', 'DE_LU', 'DK_1', 'DK_2',
38
+ #'EE', 'FI', 'FR', 'GR', 'HU', 'IT_CALA', 'IT_CNOR',
39
+ #'IT_CSUD', 'IT_NORD', 'IT_SARD', 'IT_SICI', 'IT_SUD', 'LV', 'LT',
40
+ #'NO_1', 'NO_2', 'NO_3', 'NO_4', 'NO_5', 'PL', 'PT', 'RO',
41
+ #'SE_1', 'SE_2', 'SE_3', 'SE_4', 'RS', 'SK', 'SI', 'ES', 'CH', 'ME','IE_SEM','MK','CY','BA','AL','XK']
42
+
43
+ countries = ['AT', 'BE', 'DE_LU', 'DK_1', 'DK_2', 'FR', 'IT_CALA', 'IT_CNOR',
44
+ 'IT_CSUD', 'IT_NORD', 'IT_SARD', 'IT_SICI', 'IT_SUD',
45
+ 'NL', 'PT', 'ES']
46
+
47
+ def get_time_zone(country_code):
48
+
49
+ tz_map = {
50
+ 'AL': 'Europe/Tirane',
51
+ 'AT': 'Europe/Vienna',
52
+ 'BE': 'Europe/Brussels',
53
+ 'BA': 'Europe/Sarajevo',
54
+ 'BG': 'Europe/Sofia',
55
+ 'HR': 'Europe/Zagreb',
56
+ 'CY': 'Asia/Nicosia',
57
+ 'CZ': 'Europe/Prague',
58
+ 'DE_LU': 'Europe/Berlin',
59
+ 'DK_1': 'Europe/Copenhagen',
60
+ 'DK_2': 'Europe/Copenhagen',
61
+ 'EE': 'Europe/Tallinn',
62
+ 'FI': 'Europe/Helsinki',
63
+ 'MK': 'Europe/Skopje',
64
+ 'FR': 'Europe/Paris',
65
+ 'GR': 'Europe/Athens',
66
+ 'HU': 'Europe/Budapest',
67
+ 'IS': 'Atlantic/Reykjavik',
68
+ 'IE_SEM': 'Europe/Dublin',
69
+ 'IT_CALA': 'Europe/Rome',
70
+ 'IT_CNOR': 'Europe/Rome',
71
+ 'IT_CSUD': 'Europe/Rome',
72
+ 'IT_NORD': 'Europe/Rome',
73
+ 'IT_SARD': 'Europe/Rome',
74
+ 'IT_SICI': 'Europe/Rome',
75
+ 'IT_SUD': 'Europe/Rome',
76
+ 'LV': 'Europe/Riga',
77
+ 'LT': 'Europe/Vilnius',
78
+ 'ME': 'Europe/Podgorica',
79
+ 'NL': 'Europe/Amsterdam',
80
+ 'NO_1': 'Europe/Oslo',
81
+ 'NO_2': 'Europe/Oslo',
82
+ 'NO_3': 'Europe/Oslo',
83
+ 'NO_4': 'Europe/Oslo',
84
+ 'NO_5': 'Europe/Oslo',
85
+ 'PL': 'Europe/Warsaw',
86
+ 'PT': 'Europe/Lisbon',
87
+ 'MD': 'Europe/Chisinau',
88
+ 'RO': 'Europe/Bucharest',
89
+ 'SE_1': 'Europe/Stockholm',
90
+ 'SE_2': 'Europe/Stockholm',
91
+ 'SE_3': 'Europe/Stockholm',
92
+ 'SE_4': 'Europe/Stockholm',
93
+ 'RS': 'Europe/Belgrade',
94
+ 'SK': 'Europe/Bratislava',
95
+ 'SI': 'Europe/Ljubljana',
96
+ 'ES': 'Europe/Madrid',
97
+ 'CH': 'Europe/Zurich',
98
+ 'XK': 'Europe/Rome'
99
+ }
100
+ if country_code in tz_map:
101
+ return tz_map[country_code]
102
+ else:
103
+ raise ValueError(f"Time zone for country code {country_code} is not defined.")
104
+
105
+ def convert_European_time(data, bdz):
106
+ time_zone = get_time_zone(bdz)
107
+ data.index = pd.to_datetime(data.index, utc=True)
108
+ data.index = data.index.tz_convert(time_zone)
109
+ data.index = data.index.tz_localize(None)
110
+ return data
111
 
112
+ def filter_dataframe(df):
113
+ allowed_columns = {"Load_entsoe", "Load_forecast_entsoe", "Solar_entsoe", "Solar_forecast_entsoe", "Wind_onshore_entsoe", "Wind_onshore_forecast_entsoe", "Wind_offshore_entsoe", "Wind_offshore_forecast_entsoe"}
114
+ return df[[col for col in df.columns if col in allowed_columns]]
115
 
116
+ def load_GitHub(github_token, bdz):
 
 
 
 
 
117
 
118
+ file_name=f'{bdz}_Entsoe_UTC.csv'
 
 
119
  url = f'https://raw.githubusercontent.com/margaridamascarenhas/Transparency_Data/main/{file_name}'
120
  headers = {'Authorization': f'token {github_token}'}
121
 
 
127
  if 'Date' in df.columns:
128
  df['Date'] = pd.to_datetime(df['Date']) # Convert 'Date' column to datetime
129
  df.set_index('Date', inplace=True) # Set 'Date' column as the index
130
+ df=filter_dataframe(df)
131
+ df=convert_European_time(df, bdz)
132
+ return df[df.index >= pd.Timestamp('2024-01-01')]
133
  else:
134
  print(f"Failed to download {file_name}. Status code: {response.status_code}")
135
  return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
 
137
+ def filter_variable_options(df):
138
+ all_options = {
139
+ "Load": ("Load_entsoe", "Load_forecast_entsoe"),
140
+ "Solar": ("Solar_entsoe", "Solar_forecast_entsoe"),
141
+ "Wind Onshore": ("Wind_onshore_entsoe", "Wind_onshore_forecast_entsoe"),
142
+ "Wind Offshore": ("Wind_offshore_entsoe", "Wind_offshore_forecast_entsoe"),
143
  }
144
 
145
+ variable_options = {}
146
+ flagged_columns = []
 
147
 
148
+ for key, (col1, col2) in all_options.items():
149
+ col1_exists = col1 in df.columns and not df[col1].isna().all()
150
+ col2_exists = col2 in df.columns and not df[col2].isna().all()
151
+ if col1_exists and col2_exists:
152
+ variable_options[key] = (col1, col2)
153
+ elif not col1_exists and col2_exists:
154
+ flagged_columns.append(col1)
155
+ elif col1_exists and not col2_exists:
156
+ flagged_columns.append(col2)
157
+ elif not col1_exists and not col2_exists:
158
+ flagged_columns.append(col1)
159
+ flagged_columns.append(col2)
160
+ return variable_options, flagged_columns
 
 
161
 
162
  github_token = st.secrets["GitHub_Token_KUL_Margarida"]
163
+ #countries = ['IT_CALA', 'IT_CNOR', 'IT_CSUD', 'IT_SARD', 'PT', 'FR']
164
 
165
  if github_token:
166
+ data_dict = {}
167
+ for bdz in countries:
168
+ df = load_GitHub(github_token, bdz)
169
+ if df is not None:
170
+ data_dict[bdz] = df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
 
172
  else:
173
  print("Please enter your GitHub Personal Access Token to proceed.")
174
 
175
+ col1, col2 = st.columns([5, 2])
 
176
  with col1:
177
  st.title("Transparency++")
178
 
 
185
  with col2_2:
186
  st.image("energyville_logo.png", width=100)
187
 
188
+ st.write("**Evaluate and analyze ENTSO-E Transparency Platform data quality, forecast accuracy, and energy trends for ENTSO-E member countries.**")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
 
190
  st.sidebar.header('Filters')
191
 
192
  st.sidebar.subheader("Select Country")
193
  st.sidebar.caption("Choose the country for which you want to display data or forecasts.")
194
+ selection = ['Overall'] + list(countries)
195
+ selected_country = st.sidebar.selectbox('Select Country', selection)
196
 
 
 
 
197
  if selected_country != 'Overall':
198
  st.sidebar.subheader("Section")
199
  st.sidebar.caption("Select the type of information you want to explore.")
200
+ section = st.sidebar.radio('', ['Data Quality', 'Forecasts Quality', 'Insights'], index=1)
201
  else:
202
  section = None # No section is shown when "Overall" is selected
203
 
 
205
  data = None # You can set data to None or a specific dataset based on your logic
206
  section = None # No section selected when "Overall" is chosen
207
  else:
208
+ country_code = selected_country
209
+ data = data_dict.get(selected_country)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
 
211
  if section == 'Data Quality':
 
212
  st.header('Data Quality')
 
 
213
 
214
+ # Determine if capacities missing per year
215
+ caps4 = installed_capacities_2024.get(country_code)
216
+ caps5 = installed_capacities_2025.get(country_code)
217
 
218
+ st.write(
219
+ "The table below presents the data quality metrics focusing on the percentage "
220
+ "of missing values and the occurrence of extreme or nonsensical values for "
221
+ "the selected country. Additionally, it flags any mismatch between installed "
222
+ "capacity (NaN or 0) and actual data in the dataset."
223
+ )
224
+
225
+ # Determine end of data slice (yesterday 23:59:59)
226
+ yesterday = datetime.datetime.now(tz).date() - datetime.timedelta(days=1)
227
+ end_time = pd.Timestamp(yesterday).replace(hour=23, minute=59, second=59)
228
+ # Filter data
229
+ data_quality = data[data.index <= end_time]
230
+
231
+ tech_cols = {
232
+ 'Load': ('Load_entsoe', 'Load_forecast_entsoe'),
233
+ 'Wind Onshore': ('Wind_onshore_entsoe', 'Wind_onshore_forecast_entsoe'),
234
+ 'Wind Offshore': ('Wind_offshore_entsoe', 'Wind_offshore_forecast_entsoe'),
235
+ 'Solar': ('Solar_entsoe', 'Solar_forecast_entsoe'),
236
+ }
237
 
238
+ skip_cols = []
239
+
240
+ for tech_key, (act_col, fct_col) in tech_cols.items():
241
+ # only proceed if the columns are in the DataFrame
242
+ if act_col in data_quality.columns and fct_col in data_quality.columns:
243
+ # get installed capacities for 2024 & 2025
244
+ cap4 = caps4.get(tech_key, np.nan) if isinstance(caps4, dict) else np.nan
245
+ cap5 = caps5.get(tech_key, np.nan) if isinstance(caps5, dict) else np.nan
246
+
247
+ # if both years are missing or zero capacity
248
+ if (pd.isna(cap4) or cap4 == 0) and (pd.isna(cap5) or cap5 == 0):
249
+ act = data_quality[act_col]
250
+ fct = data_quality[fct_col]
251
+ # check if actual AND forecast are entirely zero or NaN
252
+ only_zero_or_na = (act.fillna(0) == 0).all() and (fct.fillna(0) == 0).all()
253
+ if only_zero_or_na:
254
+ skip_cols += [act_col, fct_col]
255
+
256
+ # drop any columns flagged for skipping (ignore errors if somehow missing)
257
+ if skip_cols:
258
+ data_quality = data_quality.drop(columns=skip_cols, errors='ignore')
259
+
260
+ # Compute missing
261
+ missing_values = data_quality.isna().mean() * 100
262
  missing_values = missing_values.round(2)
263
 
264
+ extreme_values = {}
265
+ capacity_mismatch = {}
266
+ neg_counts = {}
267
+ over_counts = {}
268
+ cutoff = pd.Timestamp('2025-01-01')
269
+
270
+ # Iterate over columns
271
+ for col in data_quality.columns:
272
+ # Identify technology
273
+ if 'Solar' in col:
274
+ tech_key = 'Solar'
275
+ elif 'Wind_onshore' in col:
276
+ tech_key = 'Wind Onshore'
277
+ elif 'Wind_offshore' in col:
278
+ tech_key = 'Wind Offshore'
279
+ elif 'Load' in col:
280
+ tech_key = 'Load'
281
+ else:
282
+ extreme_values[col] = np.nan
283
+ capacity_mismatch[col] = np.nan
284
+ continue
285
+
286
+ series = data_quality[col]
287
+ # Year masks
288
+ mask_2024 = series.index < cutoff
289
+ # Fetch capacity values
290
+ cap4 = caps4.get(tech_key, np.nan) if isinstance(caps4, dict) else np.nan
291
+ cap5 = caps5.get(tech_key, np.nan) if isinstance(caps5, dict) else np.nan
292
+ print('var:',col)
293
+ print('cap4:',cap4)
294
+ if tech_key == 'Load':
295
+ # Negative load
296
+ extreme_pct = round((series < 0).mean() * 100, 2)
297
+ mismatch = np.nan
298
+ else:
299
+ # Create per-timestamp capacity
300
+ cap_series = pd.Series(
301
+ np.where(mask_2024, cap4, cap5),
302
+ index=series.index
303
+ )
304
+ # Flags
305
+ neg = series < 0
306
+ over = (series > cap_series) & cap_series.notna()
307
+ nonsense = neg | over
308
+ extreme_pct = round(nonsense.mean() * 100, 2)
309
+ # Mismatch: non-zero gen when cap missing or zero
310
+ # cap4, cap5 are floats or NaN
311
+ no_cap_2024 = pd.isna(cap4) or (cap4 == 0)
312
+ no_cap_2025 = pd.isna(cap5) or (cap5 == 0)
313
+
314
+ # check if there's at least one actual non-zero (treat NaN as 0)
315
+ has_nonzero = (series.fillna(0) != 0).any()
316
+
317
+ if no_cap_2024 and no_cap_2025 and has_nonzero:
318
+ mismatch = 100.0
319
+ else:
320
+ mismatch = 0.0
321
 
322
+ extreme_values[col] = extreme_pct
323
+ capacity_mismatch[col] = mismatch
324
+
325
+ display_extreme = {col: f"{val:.2f}" if not pd.isna(val) else ''
326
+ for col, val in extreme_values.items()}
327
+ display_mismatch = {}
328
+ for col, val in capacity_mismatch.items():
329
+ if 'Load' in col:
330
+ display_mismatch[col] = '-'
331
+ else:
332
+ display_mismatch[col] = '🚩' if val == 100.0 else ''
333
 
334
+ # Build and render DataFrame
335
+ metrics_df = pd.DataFrame({
336
+ 'Missing Values (%)': missing_values,
337
+ 'Extreme/Nonsensical Values (%)': pd.Series(display_extreme),
338
+ 'Capacity Mismatch Flag': pd.Series(display_mismatch)
339
+ })
340
 
341
+ st.dataframe(metrics_df.style.format({
342
+ 'Missing Values (%)': '{:.2f}',
343
+ 'Extreme/Nonsensical Values (%)': '{}'
344
+ }))
345
+
346
+ st.write('<b><u>Missing values (%)</u></b>: Percentage of missing values in the dataset',unsafe_allow_html=True)
347
+ st.write('<b><u>Extreme/Nonsensical values (%)</u></b>: For Load, this is % of values below 0. For generation, it is negative or out-of-bound (> capacity).',unsafe_allow_html=True)
348
+ st.write('<b><u>Capacity Mismatch Flag</u></b>: Shows "🚩" if installed capacity is `NaN` or `0` but the dataset has non-zero generation. Blank otherwise. For Load columns, it is "-".',unsafe_allow_html=True)
349
 
350
  elif section == 'Forecasts Quality':
351
+
352
  st.header('Forecast Quality')
353
 
354
  # Time series for last 1 week
355
  last_week = data.loc[data.index >= (data.index[-1] - pd.Timedelta(days=7))]
356
  st.write('The below plot shows the time series of forecasts vs. observations provided by the ENTSO-E Transparency platform from the past week.')
357
+ variable_options, flagged_columns = filter_variable_options(last_week)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
358
  # Dropdown to select the variable
359
  selected_variable = st.selectbox("Select Variable for Line PLot", list(variable_options.keys()))
 
 
360
  actual_col, forecast_col = variable_options[selected_variable]
361
 
362
+ x_vals = last_week.index.to_pydatetime().tolist()
363
+ y_actual = last_week[actual_col].tolist()
364
+ y_forecast = last_week[forecast_col].tolist()
365
+
366
+ # then plot
367
  fig = go.Figure()
368
+ fig.add_trace(go.Scatter(x=x_vals,y=y_actual,mode="lines",name="Actual"))
369
+ fig.add_trace(go.Scatter(x=x_vals,y=y_forecast,mode="lines",name="Forecast ENTSO-E"))
370
+ fig.update_layout(title=f"Forecasts vs Actual for {selected_variable}",xaxis_title="Date",yaxis_title="Value [MW]")
 
371
  st.plotly_chart(fig)
372
 
373
+
374
  # Scatter plots for error distribution
375
  st.subheader('Error Distribution')
376
  st.write('The below scatter plots show the error distribution of all fields: Solar, Wind and Load.')
 
379
  # Get the corresponding columns for the selected variable
380
  actual_col, forecast_col = variable_options[selected_variable]
381
 
382
+ if forecast_col in data.columns:
383
+ # grab the two series, drop any NaNs, and align on their common timestamps
384
+ obs = data[actual_col].dropna()
385
+ pred = data[forecast_col].dropna()
386
+ idx = obs.index.intersection(pred.index)
387
+ obs = obs.loc[idx]
388
+ pred = pred.loc[idx]
389
+
390
+ # convert to pure Python lists
391
+ x_vals = obs.tolist()
392
+ y_vals = pred.tolist()
393
+
394
+ fig = go.Figure()
395
+ fig.add_trace(go.Scatter(x=x_vals,y=y_vals,mode='markers',name=f'{selected_variable}'))
396
+ fig.update_layout(title=f'Error Distribution for {selected_variable}',xaxis_title='Observed [MW]',yaxis_title='Forecast ENTSO-E [MW]')
397
+
398
  st.plotly_chart(fig)
399
+
400
  st.subheader('Accuracy Metrics (Sorted by rMAE):')
401
 
402
  date_range = st.date_input(
 
410
  else:
411
  st.error("Please select a valid date range.")
412
  st.stop()
413
+ output_text = f"The below metrics are calculated from the selected date range from {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}. On the right is a radar plot with the rMAE."
 
 
414
  st.write(output_text)
415
 
416
+ data_metrics = data.loc[start_date:end_date]
 
 
 
 
 
 
 
417
 
418
+ accuracy_metrics = pd.DataFrame(columns=['MAE', 'RMSE' ,'rMAE'], index=list(variable_options.keys()))
419
 
420
+ for variable in variable_options.keys():
421
+ actual_col, forecast_col = variable_options[variable]
422
+ obs = data_metrics[actual_col]
423
+ pred = data_metrics[forecast_col]
424
+ error = pred - obs
425
+
426
+ mae = round(np.mean(np.abs(error)),2)
427
+ if 'Load' in actual_col:
428
+ persistence = obs.shift(168) # Weekly persistence
429
+ else:
430
+ persistence = obs.shift(24) # Daily persistence
431
+
432
+ # Using the whole year's data for rMAE calculations
433
+ rmae = round(mae / np.mean(np.abs(obs - persistence)),2)
434
+ rmse = round(np.sqrt(np.mean((error)**2)), 2)
435
+ row_label = variable #'Load' if 'Load' in actual_col else 'Solar' if 'Solar' in actual_col else 'Wind Offshore' if 'Wind_offshore' in actual_col else 'Wind Onshore'
436
+ accuracy_metrics.loc[row_label] = [mae, rmse, rmae]
 
 
437
 
438
  accuracy_metrics.dropna(how='all', inplace=True)# Sort by rMAE (second column)
439
+ accuracy_metrics.sort_values(by=accuracy_metrics.columns[-1], ascending=True, inplace=True)
440
  accuracy_metrics = accuracy_metrics.round(4)
441
 
442
+ col1, col2 = st.columns([1, 1])
443
 
444
  with col1:
445
+ # (optional) some top-margin before the table
446
  st.markdown(
447
  """
448
  <style>
449
+ .small-chart-container {
450
+ margin-top: 0px;
451
  }
452
  </style>
453
  """,
454
  unsafe_allow_html=True
455
  )
456
  st.dataframe(accuracy_metrics)
 
 
 
 
 
 
 
 
 
 
457
 
458
  with col2:
459
+ # prepare the data
 
460
  rmae_values = accuracy_metrics['rMAE'].tolist()
461
+ categories = accuracy_metrics.index.tolist()
462
+
463
+ # build the radar
464
+ fig = go.Figure(
465
+ go.Scatterpolar(
466
+ r=rmae_values,
467
+ theta=categories,
468
+ fill='toself',
469
+ name='rMAE'
470
+ )
471
+ )
472
+
473
+ # 👉 shrink the total size, and give extra left/right margin for your labels
474
  fig.update_layout(
475
+ width=300, # make the whole plot a bit smaller
476
+ height=300,
477
+ margin=dict(
478
+ l=50, # more space on the left for long category names
479
+ r=60, # and on the right, if needed
480
+ t=20,
481
+ b=20
482
+ ),
483
  polar=dict(
484
+ angularaxis=dict(
485
+ tickfont=dict(size=11) # if you want slightly smaller ticks
486
+ ),
487
  radialaxis=dict(
488
  visible=True,
489
+ range=[0, max(rmae_values)*1.2]
490
+ )
491
+ ),
492
  showlegend=False
493
  )
494
+
495
+ # wrap in a div so you can still control vertical spacing via CSS
496
+ st.markdown('<div class="small-chart-container">', unsafe_allow_html=True)
497
+ st.plotly_chart(fig, use_container_width=False)
498
+ st.markdown('</div>', unsafe_allow_html=True)
499
 
500
  st.subheader('ACF plots of Errors')
501
  st.write('The below plots show the ACF (Auto-Correlation Function) for the errors of all three data fields obtained from ENTSO-E: Solar, Wind and Load.')
 
519
 
520
  # Optionally calculate and store ACF values for further analysis if needed
521
  acf_values = acf(error.dropna(), nlags=240)
522
+
523
  elif section == 'Insights':
524
  st.header("Insights")
525
 
 
531
 
532
  # Resample data based on the selected resolution
533
  if resolution == 'Hourly':
534
+ resampled_data = data
535
  elif resolution == 'Daily':
536
+ resampled_data = data.resample('D').mean() # Resample to daily mean
537
 
 
 
 
 
 
 
 
538
 
539
+ resampled_data.columns = [col.replace('_entsoe', '').replace('_', ' ') for col in resampled_data.columns]
 
540
 
541
  # Drop missing values
542
+ selected_df = resampled_data.dropna()
543
 
544
  # Create the scatter plots using seaborn's pairplot
545
  sns.set_theme(style="ticks")
 
550
 
551
  elif selected_country == 'Overall':
552
 
 
 
 
 
 
 
 
 
553
  def calculate_net_load_error(df, country_code):
554
+ #filter_df = df.dropna()
555
+ filter_df = df.dropna(axis=1, how='all')
556
+ filter_df = filter_df.dropna()
557
+
558
+ if filter_df.empty:
559
+ # Return something (e.g., None) if there's no data left
560
+ print(country_code)
561
+ return None, None
562
+ net_load = filter_df['Load_entsoe'].copy()
563
+ for col in ['Wind_onshore_entsoe', 'Solar_entsoe', 'Wind_offshore_entsoe']:
564
+ if col in filter_df.columns:
565
+ net_load -= filter_df[col]
566
+
567
+ net_load_forecast = filter_df['Load_forecast_entsoe'].copy()
568
+ for col in ['Wind_onshore_forecast_entsoe', 'Solar_forecast_entsoe', 'Wind_offshore_forecast_entsoe']:
569
+ if col in filter_df.columns:
570
+ net_load_forecast -= filter_df[col]
571
  # Calculate the error based on the latest values
572
  error = (net_load_forecast - net_load).iloc[-1]
573
  date = filter_df.index[-1].strftime("%Y-%m-%d %H:%M") # Get the latest date in string format
 
575
  return error, date
576
 
577
  def plot_net_load_error_map(data_dict):
578
+ # 1) compute your errors as before
579
+ missing_zones={'ME','IE_SEM','MK','CY','BA','AL','XK'}
580
+ net_load_errors = {
581
+ country_code: calculate_net_load_error(data, country_code)
582
+ for country_code, data in data_dict.items()
583
+ }
584
  df_net_load_error = pd.DataFrame({
585
+ "zoneName": list(net_load_errors),
586
+ "net_load_error": [v[0] for v in net_load_errors.values()],
587
+ "date": [v[1] for v in net_load_errors.values()],
588
  })
589
 
590
+ # 2) split your zones into standard vs. fallback
591
+ selected = list(data_dict.keys())
592
+ standard_zones = [z for z in selected if z not in missing_zones]
593
+ fallback_zones = [z for z in selected if z in missing_zones]
594
+
595
+ # 3a) load the standard ones with entsoe.load_zones
596
  date = pd.Timestamp.now()
597
+ geo_std = load_zones(standard_zones, date).reset_index()
598
 
599
+ # 3b) manually load the fallback ones
600
+ gdfs = []
601
+ for z in fallback_zones:
602
+ fn = f"{z}.geojson"
603
+ path = f'./geojson_missing/{fn}'
604
+ g = gpd.read_file(path)
605
+ g['zoneName'] = z
606
+ gdfs.append(g)
607
 
 
 
 
608
 
609
+ geo_fb = pd.concat(gdfs, ignore_index=True) if gdfs else gpd.GeoDataFrame()
610
+
611
+ # 4) combine
612
+ geo_data = pd.concat([geo_std, geo_fb], ignore_index=True)
613
  # Merge net_load_error and date into geo_data
614
  geo_data = geo_data.merge(df_net_load_error, on='zoneName', how='left')
615
 
 
646
  geo_data,
647
  style_function=style_function,
648
  tooltip=folium.GeoJsonTooltip(
649
+ fields=["zoneName", "net_load_error", "date"],
650
  aliases=["Country:", "Net Load Error [MW]:", "Date:"],
651
  localize=True
652
  )
 
656
  colormap.add_to(m)
657
 
658
  # Display the map
659
+ _=st_folium(m, width=700, height=600)
660
 
661
  def calculate_mae(actual, forecast):
662
  return np.mean(np.abs(actual - forecast))
 
664
  def calculate_persistence_mae(data, shift_hours):
665
  return np.mean(np.abs(data - data.shift(shift_hours)))
666
 
667
+ def calculate_rmae_for_country(df, variable_options):
668
  rmae = {}
669
  rmae['Load'] = calculate_mae(df['Load_entsoe'], df['Load_forecast_entsoe']) / calculate_persistence_mae(df['Load_entsoe'], 168)
670
+
671
+ for variable in variable_options.keys():
672
+ actual_col, forecast_col = variable_options[variable]
673
+ rmae[variable] = calculate_mae(df[actual_col], df[forecast_col]) / calculate_persistence_mae(df[actual_col], 24)
 
 
 
 
 
674
 
675
+ all_opt = ["Load", "Solar", "Wind Onshore", "Wind Offshore"]
676
+ not_in_list2 = [elem for elem in all_opt if elem not in variable_options.keys()]
677
+
678
+ for ele in not_in_list2:
679
+ rmae[ele] = None
680
+
681
  return rmae
682
 
683
  def create_rmae_dataframe(data_dict):
684
 
685
+ rmae_values = {'Country': [], 'Load': [], 'Wind Onshore': [], 'Wind Offshore': [], 'Solar': []}
686
 
687
  for country_name, df in data_dict.items():
688
+ df_filtered = df.dropna()
689
+ print(country_name)
690
+ variable_options, flagged_columns = filter_variable_options(df_filtered)
691
+ rmae = calculate_rmae_for_country(df_filtered, variable_options)
692
 
693
  rmae_values['Country'].append(country_name)
 
 
 
694
 
695
+ for var, met in rmae.items():
696
+ rmae_values[var].append(met)
 
 
 
697
 
698
  return pd.DataFrame(rmae_values)
699
 
 
701
  fig = go.Figure()
702
 
703
  # Dynamically adjust angles to exclude Wind_offshore if all values are NaN
704
+ angles = ['Load']
705
+ if not rmae_df['Wind Offshore'].isna().all(): # Only include Wind_offshore if it's not NaN for all countries
706
+ angles.append('Wind Offshore')
707
+ if not rmae_df['Wind Onshore'].isna().all(): # Only include Wind_offshore if it's not NaN for all countries
708
+ angles.append('Wind Onshore')
709
+ if not rmae_df['Solar'].isna().all(): # Only include Wind_offshore if it's not NaN for all countries
710
+ angles.append('Solar')
711
+
712
  for _, row in rmae_df.iterrows():
713
  fig.add_trace(go.Scatterpolar(
714
  r=[row[angle] for angle in angles],
 
748
 
749
  # Plot radar chart for the selected countries
750
  plot_rmae_radar_chart(filtered_rmae_df)