Zack commited on
Commit
747accb
·
1 Parent(s): 0702187

fix: Possible fix to missing values

Browse files
Files changed (1) hide show
  1. app.py +6 -3
app.py CHANGED
@@ -61,7 +61,7 @@ def plot_anomalies(df_test_value, data, anomalies):
61
 
62
  def clean_data(df):
63
  # Drop rows with any null data
64
- df = df.dropna()
65
 
66
  # Check if the DataFrame already contains the correct columns
67
  if "timestamp" in df.columns and "value" in df.columns:
@@ -83,12 +83,12 @@ def clean_data(df):
83
  # Rename column
84
  df.rename(columns={"Hourly_Labor_Hours_Total": "value"}, inplace=True)
85
 
86
- elif "Date_CY" in df.columns and "Hour" in df.columns and "Net_Sales_CY" in df.columns:
87
  # Convert "Date_CY" and "Hour" columns into datetime format
88
  df["timestamp"] = pd.to_datetime(df["Date_CY"]) + pd.to_timedelta(df["Hour"].astype(int), unit='h')
89
 
90
  # Handle the case where hour is 24
91
- df.loc[df["timestamp"].dt.hour == 24, "timestamp"] = df["timestamp"] + pd.DateOffset(days=1)
92
  df["timestamp"] = df["timestamp"].dt.floor('h')
93
 
94
  # Keep only necessary columns
@@ -97,6 +97,9 @@ def clean_data(df):
97
  # Rename column
98
  df.rename(columns={"Net_Sales_CY": "value"}, inplace=True)
99
 
 
 
 
100
  return df
101
 
102
  else:
 
61
 
62
  def clean_data(df):
63
  # Drop rows with any null data
64
+ # df = df.dropna()
65
 
66
  # Check if the DataFrame already contains the correct columns
67
  if "timestamp" in df.columns and "value" in df.columns:
 
83
  # Rename column
84
  df.rename(columns={"Hourly_Labor_Hours_Total": "value"}, inplace=True)
85
 
86
+ elif "Date_CY" in df.columns and "Hour" in df.columns and "Net_Sales_CY" in df.columns:
87
  # Convert "Date_CY" and "Hour" columns into datetime format
88
  df["timestamp"] = pd.to_datetime(df["Date_CY"]) + pd.to_timedelta(df["Hour"].astype(int), unit='h')
89
 
90
  # Handle the case where hour is 24
91
+ df.loc[df["timestamp"].dt.hour == 24, "timestamp"] = df["timestamp"] - pd.DateOffset(days=1)
92
  df["timestamp"] = df["timestamp"].dt.floor('h')
93
 
94
  # Keep only necessary columns
 
97
  # Rename column
98
  df.rename(columns={"Net_Sales_CY": "value"}, inplace=True)
99
 
100
+ # Drop rows where 'value' is NaN
101
+ df = df.dropna(subset=['value'])
102
+
103
  return df
104
 
105
  else: