Zack commited on
Commit
05bf296
·
1 Parent(s): 4155129

fix: Add date.floor method to fix issues with invalid hr 24:00:00 data

Browse files
Files changed (1) hide show
  1. app.py +25 -24
app.py CHANGED
@@ -81,30 +81,31 @@ def clean_data(df):
81
  else:
82
  raise ValueError("Dataframe does not contain necessary columns.")
83
 
84
- def master(file):
85
- # read file
86
- data = pd.read_csv(file.name)
87
-
88
- # clean data
89
- data = clean_data(data)
90
-
91
- # Convert timestamp to datetime after cleaning
92
- data['timestamp'] = pd.to_datetime(data['timestamp'])
93
-
94
- data.set_index("timestamp", inplace=True)
95
-
96
- # Check if data has enough records to create sequences
97
- if len(data) < TIME_STEPS:
98
- return "Not enough data to create sequences. Need at least {} records.".format(TIME_STEPS)
99
-
100
- df_test_value = normalize_data(data)
101
- # plot input test data
102
- plot1 = plot_test_data(df_test_value)
103
- # predict
104
- anomalies = get_anomalies(df_test_value)
105
- #plot anomalous data points
106
- plot2 = plot_anomalies(df_test_value, data, anomalies)
107
- return plot2
 
108
 
109
  outputs = gr.outputs.Image()
110
 
 
81
  else:
82
  raise ValueError("Dataframe does not contain necessary columns.")
83
 
84
+ def clean_data(df):
85
+ # Check if the DataFrame already contains the correct columns
86
+ if "timestamp" in df.columns and "value" in df.columns:
87
+ df["timestamp"] = pd.to_datetime(df["timestamp"])
88
+ return df
89
+
90
+ # Check if DataFrame contains the columns to be converted
91
+ elif "Date" in df.columns and "Hour" in df.columns and "Hourly_Labor_Hours_Total" in df.columns:
92
+ # Convert "Date" and "Hour" columns into datetime format
93
+ df["timestamp"] = pd.to_datetime(df["Date"]) + pd.to_timedelta(df["Hour"].astype(int), unit='h')
94
+
95
+ # Handle the case where hour is 24
96
+ df.loc[df["timestamp"].dt.hour == 24, "timestamp"] = df["timestamp"] + pd.DateOffset(days=1)
97
+ df["timestamp"] = df["timestamp"].dt.floor('h')
98
+
99
+ # Keep only necessary columns
100
+ df = df[["timestamp", "Hourly_Labor_Hours_Total"]]
101
+
102
+ # Rename column
103
+ df.rename(columns={"Hourly_Labor_Hours_Total": "value"}, inplace=True)
104
+
105
+ return df
106
+
107
+ else:
108
+ raise ValueError("Dataframe does not contain necessary columns.")
109
 
110
  outputs = gr.outputs.Image()
111