|
|
|
import pandas as pd |
|
import tomllib |
|
|
|
value_mapping = { |
|
'his_SEX': {'female': 0, 'male': 1}, |
|
'his_HISPANIC': {'no': 0, 'yes': 1}, |
|
'his_NACCNIHR': {'whi': 0, 'blk': 1, 'asi': 2, 'ind': 3, 'haw': 4, 'mul': 5}, |
|
'his_RACE': {'whi': 0, 'blk': 1, 'asi': 2, 'ind': 3, 'haw': 4, 'oth': 5}, |
|
'his_RACESEC': {'whi': 0, 'blk': 1, 'asi': 2, 'ind': 3, 'haw': 4, 'oth': 5}, |
|
'his_RACETER': {'whi': 0, 'blk': 1, 'asi': 2, 'ind': 3, 'haw': 4, 'oth': 5}, |
|
} |
|
|
|
label_names = ['NC', 'MCI', 'DE', 'AD', 'LBD', 'VD', 'PRD', 'FTD', 'NPH', 'SEF', 'PSY', 'TBI', 'ODE'] |
|
|
|
class CSVDataset: |
|
|
|
def __init__(self, dat_file, cnf_file): |
|
''' ... ''' |
|
|
|
df = pd.read_csv(dat_file) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cnf = pd.read_csv(cnf_file) |
|
feature_names = [col for col in list(cnf['Name']) if col not in label_names] |
|
|
|
self.df = df |
|
self.df_features = df[feature_names] |
|
self.df_labels = df[label_names] |
|
|
|
def __len__(self): |
|
''' ... ''' |
|
return len(self.df) |
|
|
|
def __getitem__(self, idx): |
|
''' ... ''' |
|
row = self.df_features.iloc[idx] |
|
clean_row = row.dropna() |
|
feature_dict = clean_row.to_dict() |
|
|
|
row = self.df_labels.iloc[idx] |
|
clean_row = row.dropna() |
|
label_dict = clean_row.to_dict() |
|
|
|
return feature_dict, label_dict |
|
|
|
if __name__ == '__main__': |
|
|
|
dset = CSVDataset( |
|
dat_file = "./test.csv", |
|
cnf_file = "./input_meta_info.csv" |
|
) |
|
print(dset[1]) |
|
|
|
|
|
|