dps_challenge_api / utils /ProcessingClass.py
architojha's picture
updating api
f638bd7
import pandas as pd
class PreProcessingClass:
def __init__(self, MONATSZAHL, AUSPRAEGUNG, JAHR, MONAT, encoder):
self.parent_df = pd.DataFrame({
'MONATSZAHL': [MONATSZAHL],
'AUSPRAEGUNG': [AUSPRAEGUNG],
'JAHR': [JAHR],
'MONAT': [MONAT]
})
self.encoder = encoder
def _convert_date(self, column_name='MONAT', special_value='Summe'):
day_mapping = {
'01': 'January',
'02': 'February',
'03': 'March',
'04': 'April',
'05': 'May',
'06': 'June',
'07': 'July',
'08': 'August',
'09': 'September',
'10': 'October',
'11': 'November',
'12': 'December'
}
data_copy = self.parent_df.copy()
data_copy[column_name] = data_copy[column_name].apply(lambda x: day_mapping[x[4:]] if x != special_value else x)
return data_copy
def _one_hot(self, data):
columns_to_encode = ['MONATSZAHL', 'AUSPRAEGUNG', 'JAHR', 'MONAT']
data_copy = data.copy()
encoded_columns = self.encoder.transform(data[columns_to_encode])
encoded_column_names = self.encoder.get_feature_names_out(columns_to_encode)
encoded_df = pd.DataFrame(encoded_columns, columns=encoded_column_names, index=data.index)
final_df = pd.concat([data.drop(columns=columns_to_encode),encoded_df], axis=1)
return final_df