Spaces:

erasmopurif
/

FairUP

Runtime error

App Files Files Community

FairUP / src /aif360 /datasets /compas_dataset.py

erasmopurif

First commit

d2a8669 almost 2 years ago

raw

history blame contribute delete

3.87 kB

	import os

	import pandas as pd

	from aif360.datasets import StandardDataset


	default_mappings = {
	'label_maps': [{1.0: 'Did recid.', 0.0: 'No recid.'}],
	'protected_attribute_maps': [{0.0: 'Male', 1.0: 'Female'},
	{1.0: 'Caucasian', 0.0: 'Not Caucasian'}]
	}

	def default_preprocessing(df):
	"""Perform the same preprocessing as the original analysis:
	https://github.com/propublica/compas-analysis/blob/master/Compas%20Analysis.ipynb
	"""
	return df[(df.days_b_screening_arrest <= 30)
	& (df.days_b_screening_arrest >= -30)
	& (df.is_recid != -1)
	& (df.c_charge_degree != 'O')
	& (df.score_text != 'N/A')]

	class CompasDataset(StandardDataset):
	"""ProPublica COMPAS Dataset.

	See :file:`aif360/data/raw/compas/README.md`.
	"""

	def __init__(self, label_name='two_year_recid', favorable_classes=[0],
	protected_attribute_names=['sex', 'race'],
	privileged_classes=[['Female'], ['Caucasian']],
	instance_weights_name=None,
	categorical_features=['age_cat', 'c_charge_degree',
	'c_charge_desc'],
	features_to_keep=['sex', 'age', 'age_cat', 'race',
	'juv_fel_count', 'juv_misd_count', 'juv_other_count',
	'priors_count', 'c_charge_degree', 'c_charge_desc',
	'two_year_recid'],
	features_to_drop=[], na_values=[],
	custom_preprocessing=default_preprocessing,
	metadata=default_mappings):
	"""See :obj:`StandardDataset` for a description of the arguments.

	Note: The label value 0 in this case is considered favorable (no
	recidivism).

	Examples:
	In some cases, it may be useful to keep track of a mapping from
	`float -> str` for protected attributes and/or labels. If our use
	case differs from the default, we can modify the mapping stored in
	`metadata`:

	>>> label_map = {1.0: 'Did recid.', 0.0: 'No recid.'}
	>>> protected_attribute_maps = [{1.0: 'Male', 0.0: 'Female'}]
	>>> cd = CompasDataset(protected_attribute_names=['sex'],
	... privileged_classes=[['Male']], metadata={'label_map': label_map,
	... 'protected_attribute_maps': protected_attribute_maps})

	Now this information will stay attached to the dataset and can be
	used for more descriptive visualizations.
	"""

	filepath = os.path.join(os.path.dirname(os.path.abspath(__file__)),
	'..', 'data', 'raw', 'compas', 'compas-scores-two-years.csv')

	try:
	df = pd.read_csv(filepath, index_col='id', na_values=na_values)
	except IOError as err:
	print("IOError: {}".format(err))
	print("To use this class, please download the following file:")
	print("\n\thttps://raw.githubusercontent.com/propublica/compas-analysis/master/compas-scores-two-years.csv")
	print("\nand place it, as-is, in the folder:")
	print("\n\t{}\n".format(os.path.abspath(os.path.join(
	os.path.abspath(__file__), '..', '..', 'data', 'raw', 'compas'))))
	import sys
	sys.exit(1)

	super(CompasDataset, self).__init__(df=df, label_name=label_name,
	favorable_classes=favorable_classes,
	protected_attribute_names=protected_attribute_names,
	privileged_classes=privileged_classes,
	instance_weights_name=instance_weights_name,
	categorical_features=categorical_features,
	features_to_keep=features_to_keep,
	features_to_drop=features_to_drop, na_values=na_values,
	custom_preprocessing=custom_preprocessing, metadata=metadata)