File size: 8,755 Bytes
e67fcfa
 
 
 
 
 
 
c58f85c
 
 
2c9db99
 
 
 
 
 
 
 
 
 
 
 
 
 
c58f85c
2c9db99
c58f85c
 
 
e67fcfa
 
 
 
 
 
 
 
 
 
 
 
 
2c9db99
 
 
 
e67fcfa
 
 
 
 
 
 
 
 
 
3b1c0d9
e67fcfa
 
 
 
 
 
 
 
 
 
 
2c9db99
e67fcfa
 
 
3b1c0d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2c9db99
3b1c0d9
 
2c9db99
3b1c0d9
2c9db99
3b1c0d9
 
 
 
 
2c9db99
3b1c0d9
 
2c9db99
3b1c0d9
e67fcfa
 
 
 
 
2c9db99
 
e67fcfa
2c9db99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e67fcfa
2c9db99
 
 
 
 
 
 
 
3b1c0d9
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
import streamlit as st
import pandas as pd
import numpy as np
import plotly.graph_objs as go
from io import BytesIO


@st.cache_data
def load_data_predictions():
    df = pd.read_csv('Predictions.csv')
    df = df.rename(columns={
    'Price': 'Real Price',
    'DNN1': 'Neural Network 1',
    'DNN2': 'Neural Network 2',
    'DNN3': 'Neural Network 3',
    'DNN4': 'Neural Network 4',
    'DNN_Ensemble': 'Neural Network Ensemble',
    'LEAR56': 'Regularized Linear Model CW56',
    'LEAR84': 'Regularized Linear Model CW84',
    'LEAR112': 'Regularized Linear Model CW112',
    'LEAR730': 'Regularized Linear Model CW730',
    'LEAR_Ensemble': 'Regularized Linear Model Ensemble',
    'Persis': 'Persistence Model'
})
    df['Date'] = pd.to_datetime(df['Date'], dayfirst=True)
    df_filtered = df.dropna(subset=['Real Price'])
    return df, df_filtered

df, df_filtered = load_data_predictions()

min_date_allowed_pred = df_filtered['Date'].min().date()
max_date_allowed_pred = df_filtered['Date'].max().date()

end_date = df['Date'].max().date()
start_date = end_date - pd.Timedelta(days=7)

st.title("Belgium: Electricity Price Forecasting")

# Sidebar for inputs
with st.sidebar:
    st.write("### Variables Selection for Graph")
    st.write("Select which variables you'd like to include in the graph. This will affect the displayed charts and available data for download.")
    selected_variables = st.multiselect("Select variables to display:", options=['Real Price', 'Neural Network 1', 'Neural Network 2', 'Neural Network 3', 'Neural Network 4', 'Neural Network Ensemble', 'Regularized Linear Model CW56', 'Regularized Linear Model CW84','Regularized Linear Model CW112', 'Regularized Linear Model CW730', 'Regularized Linear Model Ensemble', 'Persistence Model'], default=['Real Price', 'Neural Network Ensemble', 'Regularized Linear Model Ensemble', 'Persistence Model'])
    st.write("### Model Selection for Scatter Plot")
    model_selection = st.selectbox("Select which model's predictions to display:", options=['Neural Network 1', 'Neural Network 2', 'Neural Network 3', 'Neural Network 4', 'Neural Network Ensemble', 'Regularized Linear Model CW56', 'Regularized Linear Model CW84','Regularized Linear Model CW112', 'Regularized Linear Model CW730', 'Regularized Linear Model Ensemble', 'Persistence Model'], index=0)  # Default to 'DNN'

    st.write("### Date Range for Metrics Calculation")
    st.write("Select the date range to calculate the metrics for the predictions. This will influence the accuracy metrics displayed below. The complete dataset ranges from 10/03/2024 until today.")
    start_date_pred, end_date_pred = st.date_input("Select Date Range for Metrics Calculation:", [min_date_allowed_pred, max_date_allowed_pred])

# Main content
if not selected_variables:
    st.warning("Please select at least one variable to display.")
else:
    # Plotting
    st.write("## Belgian Day-Ahead Electricity Prices")
    temp_df = df[(df['Date'] >= pd.Timestamp(start_date))] #& (df['Date'] <= pd.Timestamp(end_date))]
    fig = go.Figure()

    # Updated labels for each variable
    variable_labels = {
        'Price': 'Real Price',
        'DNN': 'DNN Forecast',
        'LEAR': 'LEAR Forecast',
        'Persis': 'Persistence Forecast'
    }

    for variable in selected_variables:
        fig.add_trace(go.Scatter(x=temp_df['Date'], y=temp_df[variable], mode='lines', name=variable))

    fig.update_layout(xaxis_title="Date", yaxis_title="Price [EUR/MWh]")
    st.plotly_chart(fig, use_container_width=True)
    st.write("The graph presented here illustrates the day-ahead electricity price forecasts for Belgium, covering the period from one week ago up to tomorrow. It incorporates predictions from three distinct models: DNN (Deep Neural Networks), LEAR (Lasso Estimated AutoRegressive), and Persistence, alongside the actual electricity prices up until today.")


if not selected_variables:
    st.warning("Please select at least one variable to display.")
else:
    # Plotting
    st.write("## Scatter Plot: Real Price vs Model Predictions")
    # Filter based on the selected date range for plotting
    plot_df = df[(df['Date'] >= pd.Timestamp(min_date_allowed_pred)) & (df['Date'] <= pd.Timestamp(max_date_allowed_pred))]
    
    model_column = model_selection

    # Create the scatter plot
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=plot_df['Real Price'], y=plot_df[model_column], mode='markers', name=f"Real Price vs {model_selection} Predictions"))

    # Calculate the line of best fit
    m, b = np.polyfit(plot_df['Real Price'], plot_df[model_column], 1)
    # Calculate the y-values based on the line of best fit
    regression_line = m * plot_df['Real Price'] + b

    # Format the equation to display as the legend name
    equation = f"y = {m:.2f}x + {b:.2f}"

    # Add the line of best fit to the figure with the equation as the legend name
    fig.add_trace(go.Scatter(x=plot_df['Real Price'], y=regression_line, mode='lines', name=equation, line=dict(color='black')))

    # Update layout with appropriate titles
    fig.update_layout(xaxis_title="Real Price [EUR/MWh]", yaxis_title=f"{model_selection} Predictions [EUR/MWh]", title=f"Scatter Plot of Real Price vs {model_selection} Predictions from {min_date_allowed_pred} to {max_date_allowed_pred}")
    st.plotly_chart(fig, use_container_width=True)


# Calculating and displaying metrics
if start_date_pred and end_date_pred:
    st.header("Accuracy Metrics")
    #st.write(f"The accuracy metrics are calculated from {start_date_pred} to {end_date_pred}, this intervale can be changed in the sidebar.")
    st.write(f"The accuracy metrics are calculated from **{start_date_pred}** to **{end_date_pred}**. This interval can be changed in the sidebar.. Evaluate the forecasting accuracy of our models with key performance indicators. The table summarizes the Mean Absolute Error (MAE), Symmetric Mean Absolute Percentage Error (SMAPE), and Root Mean Square Error (RMSE) for the selected models over your selected date range. Lower values indicate higher precision and reliability of the forecasts.")
    filtered_df = df_filtered[(df_filtered['Date'] >= pd.Timestamp(start_date_pred)) & (df_filtered['Date'] <= pd.Timestamp(end_date_pred))]
    
    # List of models for convenience
    models = [
        'Neural Network 1', 'Neural Network 2', 'Neural Network 3', 'Neural Network 4', 'Neural Network Ensemble',
        'Regularized Linear Model CW56', 'Regularized Linear Model CW84', 'Regularized Linear Model CW112', 'Regularized Linear Model CW730', 'Regularized Linear Model Ensemble',
        'Persistence Model'
    ]
    
    # Placeholder for results
    results = {'Metric': ['MAE', 'sMAPE', 'RMSE', 'rMAE']}
    
    p_real = filtered_df['Real Price']
    
    # Iterate through each model to calculate and store metrics
    for model in models:
        # Assuming column names in filtered_df match the model names directly for simplicity
        p_pred = filtered_df[model]
        
        mae = np.mean(np.abs(p_real - p_pred))
        smape = 100 * np.mean(np.abs(p_real - p_pred) / ((np.abs(p_real) + np.abs(p_pred)) / 2))
        rmse = np.sqrt(np.mean((p_real - p_pred) ** 2))
        rmae = mae/np.mean(np.abs(p_real - filtered_df['Persistence Model']))

        # Store the results
        results[model] = [f"{mae:.2f}", f"{smape:.2f}%", f"{rmse:.2f}", f"{rmae:.2f}"]
    
    # Convert the results to a DataFrame for display
    metrics_df = pd.DataFrame(results)
    
    transposed_metrics_df = metrics_df.set_index('Metric').T
    col1, col2 = st.columns([3, 2])

    # Display the transposed DataFrame
    with col1:
        # Assuming 'transposed_metrics_df' is your final DataFrame with metrics
        st.dataframe(transposed_metrics_df,  hide_index=False)

with col2:
    st.markdown("""
        <style>
        .big-font {
            font-size: 20px;
            font-weight: 500;
        }
        </style>
        <div class="big-font">
        Equations
        </div>
        """, unsafe_allow_html=True)
    
    # Rendering LaTeX equations
    st.markdown(r"""
    $\text{MAE} = \frac{1}{n}\sum_{i=1}^{n}|y_i - \hat{y}_i|$
    

    $\text{sMAPE} =100\frac{1}{n} \sum_{i=1}^{n} \frac{|y_i - \hat{y}_i|}{\left(|y_i| + |\hat{y}_i|\right)/2}$
    
                
    $\text{RMSE} = \sqrt{\frac{1}{n}\sum_{i=1}^{n}\left(y_i - \hat{y}_i\right)^2}$
    
                
    $\text{rMAE} = \frac{\text{MAE}}{MAE_{\text{Persistence Model}}}$
    """)


# Download Predictions Button
st.write("## Access Predictions")
st.write("If you are interested in accessing the predictions made by the models, please contact Margarida Mascarenhas (KU Leuven PhD Student) at [email protected]")