File size: 6,841 Bytes
61c386c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e795a76
 
61c386c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
#Importierung Pakete
import gradio as gr
import pandas as pd #Verarbeitung excel/csv

from darts.models import LinearRegressionModel #Importierung Klasse LinearRegressionModel von darts
from darts import TimeSeries #In darts wird für Datensätze das TimeSeries Format verwendet

from sklearn.metrics import mean_absolute_error, r2_score

import os
import plotly.express as px #Erstellung von Diagrammen (dynamisch --> bspw. zoom)

# Funktion, um die hochgeladene Datei zu lesen
def upload_excel(file):
  if file is not None:
    # Excel-Datei lesen
    df = pd.read_excel(file.name, index_col='time')
    return df

# Funktion, um Checkbox-Optionen automatisch zu erstellen
def create_feature_checkbox(df):
  if df is not None:
    new_choices = list(df.columns)
    return gr.update(choices=new_choices)
  else:
    return gr.update(choices=[])

# Funktion, um Trainingsstartdatum automatisch zu laden
def fill_start_date(df):
  if df is not None:
    start_date = str(df.index.min())
    return gr.update(value=start_date)
  else:
    return gr.update(value='')

# Funktion, um Teststartdatum automatisch zu laden
def fill_end_date(df):
  if df is not None:
    end_date = str(df.index.max())
    return gr.update(value=end_date)
  else:
    return gr.update(value='')

#Funktion, läd die excel Datei hoch und führt außerdem die Funktionen create_feature_checkbox, fill_start_date, fill_end_date
def process_excel(file):
  df = upload_excel(file)
  return df, create_feature_checkbox(df), create_feature_checkbox(df), fill_start_date(df), fill_end_date(df) #create_feature_checkbox steht zwei mal drinnen wegen target und covariate

#Funktion training und training/test-split
def data_split_and_training(df,start_train,end_train,start_test,end_test,target,covariates,lags_target,lags_covariates):
  #Aufteilen in Train Test
  train_df = df[start_train:end_train]
  test_df = df[start_test:end_test]

  #Umwandeln in darts.TimeSeries
  train = TimeSeries.from_dataframe(train_df)
  test = TimeSeries.from_dataframe(test_df)

  #Lags Eingabe in Liste umwandeln für Darts Bibliothek
  lags_target_list = list(range(-1,-lags_target-1,-1))
  lags_covariates_list = list(range(-1,-lags_covariates-1,-1))

  #model instantiation
  model = LinearRegressionModel(lags=lags_target_list, lags_future_covariates=lags_covariates_list)

  #model training
  model_fit = model.fit(train[target], future_covariates= train[covariates])
  return train, test, model_fit, f'Training is complete'

# Funktion zur Speicherung des trainierten Modells
def create_file(state_value):
    file_path = "model_fit.pt"
    with open(file_path, "w") as f:
        f.write(f"State Value: {state_value}")
    return file_path

# Funktion für Prediction
def test(model_fit,test,train,covariates):
  #prediction
  prediction = model_fit.predict(n=len(test), future_covariates= train.append(test)[covariates])

  #converting to pandas
  df_prediction = prediction.pd_dataframe()

  # Visualisierung mit Plotly
  fig = px.line()
  fig.add_scatter(x=df_prediction.index, y=test.pd_dataframe()[df_prediction.columns[0]], mode='lines', name='test', line_color='red')
  fig.add_scatter(x=df_prediction.index, y=df_prediction[df_prediction.columns[0]], mode='lines', name='predict', line_color='blue')
  fig.update_yaxes(title_text=df_prediction.columns[0])

  #check metrics
  target_variable = []
  MAE = []
  R2 = []
  for i in df_prediction.columns:
    target_variable.append(i)
    MAE.append(mean_absolute_error(df_prediction[i], test.pd_dataframe()[i]))
    R2.append(r2_score(df_prediction[i], test.pd_dataframe()[i]))
  metrics = pd.DataFrame({'target':target_variable,'MAE': MAE, 'R2': R2})
  return metrics, fig



# Gradio-Interface erstellen
with gr.Blocks() as demo:
  gr.Markdown("## ML-based Building Simulation") #Überschrift
  gr.Markdown("This demo offers the possibility of creating building simulations based on time series data (measurement data with timestamps). The demo uses the machine learning algorithm multiple linear Regression. The target variables and additional external variables of the forecast can be selected based on the data.") #Beschreibung


  # Datei-Upload-Komponente
  file_input = gr.File(label="Drop an excel file", file_types=[".xls", ".xlsx"]) #Hochladebereich

  #Upload Button
  upload_button = gr.Button("Upload Excel") #Button für Funktion proceed_excel()

  #Feature checkboxen erstellen
  checkbox_group_target = gr.CheckboxGroup(label="Select target variable", choices=[], interactive=True) #Auswahl target variable
  checkbox_group_covariate = gr.CheckboxGroup(label="Select exogenous variables", choices=[], interactive=True) #Auswahl covariate variables

  with gr.Row():
    # Eingabe Teilung Training und Testdaten
    start_train = gr.Textbox(label='start_train:', lines=1, interactive=True)
    end_train = gr.Textbox(label='end_train:', lines=1, interactive=True, value = '2019-06-04 02:15:00')
    start_test = gr.Textbox(label='start_test:', lines=1, interactive=True, value = '2019-06-04 02:30:00')
    end_test = gr.Textbox(label='end_test:', lines=1, interactive=True)

  # Eingabe Verzögerung
  with gr.Row():
    lags_target = gr.Number(value=1, label='Number of laged values for target', interactive=True)
    lags_covariates = gr.Number(value=1, label='Number of laged values for exogenous variables', interactive=True)

  # DataFrame df wird als Zustand gespeichert, damit er in in anderen Funktionen weiter verwendet werden kann
  df_state = gr.State(None)
  train_state = gr.State(None)
  test_state = gr.State(None)
  model_fit_state = gr.State(None)

  #Training Button
  training_button = gr.Button("Training") #Button für Funktion data_split_and_training()

  with gr.Row():
    #Training Status
    status_output = gr.Textbox(label="Training status")
    with gr.Column():
      #Download Button
      download_button = gr.DownloadButton("Download model")
      test_button = gr.Button("Test")

  # Tabellenausgabe Metrik
  metrics_table = gr.DataFrame(label="Metrics", headers=['target','MAE','R2'])

  # Plotausgabe Vorhersagen
  plot = gr.Plot()

  # Event-Verknüpfungen
  upload_button.click(process_excel, inputs=file_input, outputs=[df_state, checkbox_group_target, checkbox_group_covariate, start_train, end_test])
  training_button.click(data_split_and_training,
                        inputs=[df_state, start_train, end_train, start_test, end_test, checkbox_group_target, checkbox_group_covariate, lags_target, lags_covariates],
                        outputs=[train_state, test_state, model_fit_state, status_output])
  download_button.click(create_file,inputs=[model_fit_state],outputs=[download_button])
  test_button.click(test,inputs=[model_fit_state,test_state,train_state,checkbox_group_covariate],outputs=[metrics_table,plot])

# Demo starten
demo.launch(show_error=True)