import pandas as pd class PreProcessingClass: def __init__(self, MONATSZAHL, AUSPRAEGUNG, JAHR, MONAT, encoder): self.parent_df = pd.DataFrame({ 'MONATSZAHL': [MONATSZAHL], 'AUSPRAEGUNG': [AUSPRAEGUNG], 'JAHR': [JAHR], 'MONAT': [MONAT] }) self.encoder = encoder def _convert_date(self, column_name='MONAT', special_value='Summe'): day_mapping = { '01': 'January', '02': 'February', '03': 'March', '04': 'April', '05': 'May', '06': 'June', '07': 'July', '08': 'August', '09': 'September', '10': 'October', '11': 'November', '12': 'December' } data_copy = self.parent_df.copy() data_copy[column_name] = data_copy[column_name].apply(lambda x: day_mapping[x[4:]] if x != special_value else x) return data_copy def _one_hot(self, data): columns_to_encode = ['MONATSZAHL', 'AUSPRAEGUNG', 'JAHR', 'MONAT'] data_copy = data.copy() encoded_columns = self.encoder.transform(data[columns_to_encode]) encoded_column_names = self.encoder.get_feature_names_out(columns_to_encode) encoded_df = pd.DataFrame(encoded_columns, columns=encoded_column_names, index=data.index) final_df = pd.concat([data.drop(columns=columns_to_encode),encoded_df], axis=1) return final_df