Spaces:
Sleeping
Sleeping
import pandas as pd | |
class PreProcessingClass: | |
def __init__(self, MONATSZAHL, AUSPRAEGUNG, JAHR, MONAT, encoder): | |
self.parent_df = pd.DataFrame({ | |
'MONATSZAHL': [MONATSZAHL], | |
'AUSPRAEGUNG': [AUSPRAEGUNG], | |
'JAHR': [JAHR], | |
'MONAT': [MONAT] | |
}) | |
self.encoder = encoder | |
def _convert_date(self, column_name='MONAT', special_value='Summe'): | |
day_mapping = { | |
'01': 'January', | |
'02': 'February', | |
'03': 'March', | |
'04': 'April', | |
'05': 'May', | |
'06': 'June', | |
'07': 'July', | |
'08': 'August', | |
'09': 'September', | |
'10': 'October', | |
'11': 'November', | |
'12': 'December' | |
} | |
data_copy = self.parent_df.copy() | |
data_copy[column_name] = data_copy[column_name].apply(lambda x: day_mapping[x[4:]] if x != special_value else x) | |
return data_copy | |
def _one_hot(self, data): | |
columns_to_encode = ['MONATSZAHL', 'AUSPRAEGUNG', 'JAHR', 'MONAT'] | |
data_copy = data.copy() | |
encoded_columns = self.encoder.transform(data[columns_to_encode]) | |
encoded_column_names = self.encoder.get_feature_names_out(columns_to_encode) | |
encoded_df = pd.DataFrame(encoded_columns, columns=encoded_column_names, index=data.index) | |
final_df = pd.concat([data.drop(columns=columns_to_encode),encoded_df], axis=1) | |
return final_df | |