|
import time |
|
from utils.levels import complete_level, render_page, initialize_level |
|
from utils.login import get_login, initialize_login |
|
import streamlit as st |
|
import pandas as pd |
|
import numpy as np |
|
from sklearn.model_selection import train_test_split |
|
from sklearn.metrics import r2_score |
|
from sklearn.ensemble import RandomForestRegressor |
|
from sklearn.model_selection import RandomizedSearchCV |
|
import matplotlib.pyplot as plt |
|
from matplotlib.backends.backend_agg import RendererAgg |
|
_lock = RendererAgg.lock |
|
import base64 |
|
from io import BytesIO |
|
from PIL import Image, ImageFilter |
|
import lightgbm as lgb |
|
|
|
initialize_login() |
|
initialize_level() |
|
|
|
LEVEL = 3 |
|
|
|
File_PATH = 'datasets/Building_forcasting.csv' |
|
|
|
def process_file(csv_file): |
|
data = pd.read_csv(csv_file, index_col='Timestamp') |
|
data.index = pd.to_datetime(data.index) |
|
data = data.fillna(0) |
|
return data |
|
|
|
|
|
def model_predict(data, model_choice, train_size, tune_model): |
|
if model_choice == 'LightGBM': |
|
model = lgb.LGBMRegressor() if not tune_model else lgb.LGBMRegressor(**tuned_parameters('lgbm')) |
|
elif model_choice == 'Random Forest': |
|
model = RandomForestRegressor(n_estimators=100, random_state=42) if not tune_model else RandomForestRegressor(**tuned_parameters('rf')) |
|
|
|
X, y = create_model_inputs(data, 288, 288) |
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=train_size/100, random_state=42, shuffle=False) |
|
|
|
model.fit(X_train, y_train) |
|
y_pred = model.predict(X_test) |
|
|
|
return y_test, y_pred, model |
|
|
|
|
|
def create_model_inputs(data, lag, mean_period): |
|
df_processed = data.copy() |
|
df_processed['PV_Output_lag'] = df_processed['PV_Output'].shift(lag) |
|
df_processed['PV_Output_mean'] = df_processed['PV_Output'].rolling(window=mean_period).mean() |
|
|
|
X = df_processed[['Solar_Irradiance', 'Temperature', 'Rain_Fall', 'Wind_speed', 'PV_Output_lag', 'PV_Output_mean']].dropna() |
|
y = df_processed[['PV_Output']].loc[X.index] |
|
|
|
return X, y |
|
|
|
|
|
def show_output(y_test, y_pred): |
|
st.sidebar.subheader("Model Performance") |
|
st.sidebar.write(f"Test R2 score: {r2_score(y_test, y_pred):.2f}") |
|
|
|
fig, axs = plt.subplots(3, figsize=(12, 18)) |
|
axs[0].plot(y_test.index, y_pred/1000, label='Predicted') |
|
axs[0].plot(y_test.index, y_test['PV_Output']/1000, label='Actual') |
|
axs[0].legend() |
|
axs[0].set_title('Prediction vs Actual (Solar Power Generation)') |
|
axs[0].set_xlabel('Date') |
|
axs[0].set_ylabel('Solar Power Generation (kW)') |
|
|
|
axs[1].plot(y_test.index, y_pred/1000, label='Predicted') |
|
axs[1].set_title('Predicted Solar Power Generation') |
|
axs[1].set_xlabel('Date') |
|
axs[1].set_ylabel('Solar Power Generation (kW)') |
|
|
|
axs[2].plot(y_test.index, y_test['PV_Output']/1000, label='Actual') |
|
axs[2].set_title('Actual Solar Power Generation') |
|
axs[2].set_xlabel('Date') |
|
axs[2].set_ylabel('Solar Power Generation (kW)') |
|
|
|
fig.tight_layout() |
|
with _lock: |
|
st.pyplot(fig) |
|
|
|
return fig |
|
|
|
|
|
def download_link(y_test, y_pred): |
|
y_pred_df = pd.DataFrame({'Timestamp': y_test.index, 'Predicted_Power': y_pred, 'Actual_Total_Power_(kW)': y_test['PV_Output']}) |
|
csv = y_pred_df.to_csv(index=False) |
|
b64 = base64.b64encode(csv.encode()).decode() |
|
href = f'<a href="data:file/csv;base64,{b64}" download="Predicted_Solar_Power.csv">Download Predicted Power CSV File</a>' |
|
st.sidebar.markdown(href, unsafe_allow_html=True) |
|
|
|
|
|
def feature_importance_plot(model, feature_names): |
|
|
|
importance = model.feature_importances_ |
|
|
|
importance = 100.0 * (importance / importance.sum()) |
|
plt.figure(figsize=(10, 6)) |
|
plt.bar(feature_names, importance) |
|
plt.title('Feature Importance') |
|
plt.xlabel('Features') |
|
plt.ylabel('Importance (%)') |
|
return plt.gcf() |
|
|
|
|
|
def download_plot(fig): |
|
tmpfile = BytesIO() |
|
fig.savefig(tmpfile, format='png') |
|
encoded = base64.b64encode(tmpfile.getvalue()).decode('utf-8') |
|
|
|
href = f'<a href="data:image/png;base64,{encoded}" download="plot.png">Download Result Plot</a>' |
|
st.sidebar.markdown(href, unsafe_allow_html=True) |
|
|
|
|
|
def tuned_parameters(model): |
|
if model == 'lgbm': |
|
params = { |
|
'num_leaves': [10, 20, 30, 40, 50], |
|
'max_depth': [-1, 3, 5, 10], |
|
'learning_rate': [0.01, 0.05, 0.1], |
|
'n_estimators': [100, 500, 1000] |
|
} |
|
return params |
|
|
|
elif model == 'rf': |
|
params = { |
|
'n_estimators': [10, 100, 500, 1000], |
|
'max_depth': [None, 10, 20, 30, 40, 50], |
|
'min_samples_split': [2, 5, 10], |
|
'min_samples_leaf': [1, 2, 4] |
|
} |
|
return params |
|
|
|
def step3_page(): |
|
st.header("Training the Model") |
|
st.subheader("Step 1: Data Collection") |
|
st.write("To initiate the weather forecasting model training process, kindly provide a sufficient and relevant dataset with weather-related attributes in .csv format for uploading. This dataset will be crucial for the model's training and accuracy.") |
|
|
|
|
|
col1 = st.columns([1]) |
|
with col1[0]: |
|
csv_file = st.file_uploader("Upload CSV", type=['csv']) |
|
if csv_file is not None: |
|
data = process_file(csv_file) |
|
df = pd.DataFrame(data) |
|
st.subheader("Let's display the uploaded dataset!") |
|
st.dataframe(df) |
|
else: |
|
st.error("Please upload a valid .csv file") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if st.button("Complete"): |
|
complete_level(LEVEL) |
|
|
|
|
|
render_page(step3_page, LEVEL) |
|
|