import pandas as pd import re class Janitor: def __init__(self): pass def clean_dataframe(self, df: pd.DataFrame) -> pd.DataFrame: # Apply all cleaning procedure in sequence df = df.copy() # First make a copy to preserve integrity of the old df df = self.drop_duplicates(df) df = self.snake_case_columns(df) df = self.fix_none(df) df = self.fix_datatypes(df) df = self.dropna_target(df) df = df.reset_index(drop=True) # Fix index return df def drop_duplicates(self, df): return df.drop_duplicates() if df.duplicated().sum() > 0 else df def snake_case_columns(self, df): pattern = r'(?