Spaces:

erasmopurif
/

FairUP

Runtime error

App Files Files Community

FairUP / src /aif360 /datasets /meps_dataset_panel19_fy2015.py

erasmopurif

First commit

d2a8669 almost 2 years ago

raw

history blame contribute delete

5.8 kB

	import os

	import pandas as pd

	from aif360.datasets import StandardDataset

	default_mappings = {
	'label_maps': [{1.0: '>= 10 Visits', 0.0: '< 10 Visits'}],
	'protected_attribute_maps': [{1.0: 'White', 0.0: 'Non-White'}]
	}

	def default_preprocessing(df):
	"""
	1.Create a new column, RACE that is 'White' if RACEV2X = 1 and HISPANX = 2 i.e. non Hispanic White
	and 'non-White' otherwise
	2. Restrict to Panel 19
	3. RENAME all columns that are PANEL/ROUND SPECIFIC
	4. Drop rows based on certain values of individual features that correspond to missing/unknown - generally < -1
	5. Compute UTILIZATION, binarize it to 0 (< 10) and 1 (>= 10)
	"""
	def race(row):
	if ((row['HISPANX'] == 2) and (row['RACEV2X'] == 1)): #non-Hispanic Whites are marked as WHITE; all others as NON-WHITE
	return 'White'
	return 'Non-White'

	df['RACEV2X'] = df.apply(lambda row: race(row), axis=1)
	df = df.rename(columns = {'RACEV2X' : 'RACE'})

	df = df[df['PANEL'] == 19]

	# RENAME COLUMNS
	df = df.rename(columns = {'FTSTU53X' : 'FTSTU', 'ACTDTY53' : 'ACTDTY', 'HONRDC53' : 'HONRDC', 'RTHLTH53' : 'RTHLTH',
	'MNHLTH53' : 'MNHLTH', 'CHBRON53' : 'CHBRON', 'JTPAIN53' : 'JTPAIN', 'PREGNT53' : 'PREGNT',
	'WLKLIM53' : 'WLKLIM', 'ACTLIM53' : 'ACTLIM', 'SOCLIM53' : 'SOCLIM', 'COGLIM53' : 'COGLIM',
	'EMPST53' : 'EMPST', 'REGION53' : 'REGION', 'MARRY53X' : 'MARRY', 'AGE53X' : 'AGE',
	'POVCAT15' : 'POVCAT', 'INSCOV15' : 'INSCOV'})

	df = df[df['REGION'] >= 0] # remove values -1
	df = df[df['AGE'] >= 0] # remove values -1

	df = df[df['MARRY'] >= 0] # remove values -1, -7, -8, -9

	df = df[df['ASTHDX'] >= 0] # remove values -1, -7, -8, -9

	df = df[(df[['FTSTU','ACTDTY','HONRDC','RTHLTH','MNHLTH','HIBPDX','CHDDX','ANGIDX','EDUCYR','HIDEG',
	'MIDX','OHRTDX','STRKDX','EMPHDX','CHBRON','CHOLDX','CANCERDX','DIABDX',
	'JTPAIN','ARTHDX','ARTHTYPE','ASTHDX','ADHDADDX','PREGNT','WLKLIM',
	'ACTLIM','SOCLIM','COGLIM','DFHEAR42','DFSEE42','ADSMOK42',
	'PHQ242','EMPST','POVCAT','INSCOV']] >= -1).all(1)] #for all other categorical features, remove values < -1

	def utilization(row):
	return row['OBTOTV15'] + row['OPTOTV15'] + row['ERTOT15'] + row['IPNGTD15'] + row['HHTOTD15']

	df['TOTEXP15'] = df.apply(lambda row: utilization(row), axis=1)
	lessE = df['TOTEXP15'] < 10.0
	df.loc[lessE,'TOTEXP15'] = 0.0
	moreE = df['TOTEXP15'] >= 10.0
	df.loc[moreE,'TOTEXP15'] = 1.0

	df = df.rename(columns = {'TOTEXP15' : 'UTILIZATION'})
	return df


	class MEPSDataset19(StandardDataset):
	"""MEPS Dataset.

	See :file:`aif360/data/raw/meps/README.md`.
	"""

	def __init__(self, label_name='UTILIZATION', favorable_classes=[1.0],
	protected_attribute_names=['RACE'],
	privileged_classes=[['White']],
	instance_weights_name='PERWT15F',
	categorical_features=['REGION','SEX','MARRY',
	'FTSTU','ACTDTY','HONRDC','RTHLTH','MNHLTH','HIBPDX','CHDDX','ANGIDX',
	'MIDX','OHRTDX','STRKDX','EMPHDX','CHBRON','CHOLDX','CANCERDX','DIABDX',
	'JTPAIN','ARTHDX','ARTHTYPE','ASTHDX','ADHDADDX','PREGNT','WLKLIM',
	'ACTLIM','SOCLIM','COGLIM','DFHEAR42','DFSEE42','ADSMOK42',
	'PHQ242','EMPST','POVCAT','INSCOV'],
	features_to_keep=['REGION','AGE','SEX','RACE','MARRY',
	'FTSTU','ACTDTY','HONRDC','RTHLTH','MNHLTH','HIBPDX','CHDDX','ANGIDX',
	'MIDX','OHRTDX','STRKDX','EMPHDX','CHBRON','CHOLDX','CANCERDX','DIABDX',
	'JTPAIN','ARTHDX','ARTHTYPE','ASTHDX','ADHDADDX','PREGNT','WLKLIM',
	'ACTLIM','SOCLIM','COGLIM','DFHEAR42','DFSEE42','ADSMOK42','PCS42',
	'MCS42','K6SUM42','PHQ242','EMPST','POVCAT','INSCOV','UTILIZATION','PERWT15F'],
	features_to_drop=[],
	na_values=[], custom_preprocessing=default_preprocessing,
	metadata=default_mappings):

	filepath = os.path.join(os.path.dirname(os.path.abspath(__file__)),
	'..', 'data', 'raw', 'meps', 'h181.csv')

	try:
	df = pd.read_csv(filepath, sep=',', na_values=na_values)
	except IOError as err:
	print("IOError: {}".format(err))
	print("To use this class, please follow the instructions in:")
	print("\n\t{}\n".format(os.path.abspath(os.path.join(
	os.path.abspath(__file__), '..', '..', 'data', 'raw', 'meps', 'README.md'))))
	print("\n to download and convert the 2015 data and place the final h181.csv file, as-is, in the folder:")
	print("\n\t{}\n".format(os.path.abspath(os.path.join(
	os.path.abspath(__file__), '..', '..', 'data', 'raw', 'meps'))))
	import sys
	sys.exit(1)

	super(MEPSDataset19, self).__init__(df=df, label_name=label_name,
	favorable_classes=favorable_classes,
	protected_attribute_names=protected_attribute_names,
	privileged_classes=privileged_classes,
	instance_weights_name=instance_weights_name,
	categorical_features=categorical_features,
	features_to_keep=features_to_keep,
	features_to_drop=features_to_drop, na_values=na_values,
	custom_preprocessing=custom_preprocessing, metadata=metadata)