context("Comprehensive Test for Classification Metric") test_that("running dataset test", { act <- aif360::binary_label_dataset( data_path = system.file("extdata", "actual_data.csv", package="aif360"), favor_label=1, unfavor_label=0, unprivileged_protected_attribute=0, privileged_protected_attribute=1, target_column="income", protected_attribute="sex") pred <- aif360::binary_label_dataset( data_path = system.file("extdata", "predicted_data.csv", package="aif360"), favor_label=1, unfavor_label=0, unprivileged_protected_attribute=0, privileged_protected_attribute=1, target_column="income", protected_attribute="sex") cm <- classification_metric(act, pred, unprivileged_groups = list('sex', 0), privileged_groups = list('sex', 1)) expect_equal(cm$accuracy(), 0.55) expect_equal(cm$accuracy(privileged=TRUE), 0.727, tolerance=0.000273) expect_equal(cm$accuracy(privileged=FALSE), 0.3333, tolerance=0.000333) expect_equal(cm$average_abs_odds_difference(), 0.4250, tolerance=0.000603) expect_equal(cm$average_odds_difference(), -0.07545, tolerance=5.32e-05) expect_equal(class(cm$binary_confusion_matrix()), "list") # expect_equal(cm$between_all_groups_coefficient_of_variation(), 0.0568, tolerance=3.65e-05) expect_equal(cm$between_all_groups_generalized_entropy_index(alpha=2), 0.00161, tolerance=3.49e-06) expect_equal(cm$between_all_groups_theil_index(), 0.0016, tolerance=8.24e-06) # expect_equal(cm$coefficient_of_variation(), 0.5685, tolerance=0.00105) expect_equal(cm$disparate_impact(), 1.629, tolerance=0.00063) expect_equal(cm$equal_opportunity_difference(), -0.5) expect_equal(cm$error_rate(), 0.45) expect_equal(cm$error_rate(privileged=TRUE), 0.27, tolerance=0.00273) expect_equal(cm$error_rate(privileged=FALSE), 0.66, tolerance=0.00667) expect_equal(cm$error_rate_difference(), 0.3939, tolerance=3.94e-05) expect_equal(cm$error_rate_ratio(), 2.44, tolerance=0.00444) expect_equal(cm$false_discovery_rate(), 0.857, tolerance=0.000143) expect_equal(cm$false_discovery_rate(privileged=TRUE), 0.66, tolerance=0.00667) expect_equal(cm$false_discovery_rate(privileged=FALSE), 1) expect_equal(cm$false_discovery_rate_difference(), 0.3333, tolerance=0.000333) expect_equal(cm$false_discovery_rate_ratio(), 1.5) expect_equal(cm$false_negative_rate(), 0.75) expect_equal(cm$false_negative_rate(privileged=TRUE), 0.5) expect_equal(cm$false_negative_rate(privileged=FALSE), 1) expect_equal(cm$false_negative_rate_difference(), 0.5) expect_equal(cm$false_negative_rate_ratio(), 2) expect_equal(cm$false_omission_rate(), 0.2310, tolerance=0.000769) expect_equal(cm$false_omission_rate(privileged=TRUE), 0.125) expect_equal(cm$false_omission_rate(privileged=FALSE), 0.4) expect_equal(cm$false_omission_rate_difference(), 0.275) expect_equal(cm$false_omission_rate_ratio(), 3.2) expect_equal(cm$false_positive_rate(), 0.375) expect_equal(cm$false_positive_rate(privileged=TRUE), 0.2215, tolerance=0.00222) expect_equal(cm$false_positive_rate(privileged=FALSE), 0.571, tolerance=0.000429) expect_equal(cm$false_positive_rate_difference(), 0.34901, tolerance=0.000206) expect_equal(class(cm$generalized_binary_confusion_matrix()), "list") expect_equal(cm$generalized_entropy_index(alpha=2), 0.16, tolerance=0.00163) expect_equal(cm$generalized_false_negative_rate(), 0.75) expect_equal(cm$generalized_false_negative_rate(privileged=TRUE), 0.5) expect_equal(cm$generalized_false_negative_rate(privileged=FALSE), 1) expect_equal(cm$generalized_false_positive_rate(), 0.375) expect_equal(cm$generalized_false_positive_rate(privileged=TRUE), 0.222, tolerance=0.00222) expect_equal(cm$generalized_false_positive_rate(privileged=FALSE), 0.571, tolerance=0.000429) expect_equal(cm$generalized_true_negative_rate(), 0.625) expect_equal(cm$generalized_true_negative_rate(privileged=TRUE), 0.778, tolerance=1) expect_equal(cm$generalized_true_negative_rate(privileged=FALSE), 0.428, tolerance=1) expect_equal(cm$generalized_true_positive_rate(), 0.25) expect_equal(cm$generalized_true_positive_rate(privileged=TRUE), 0.5) expect_equal(cm$generalized_true_positive_rate(privileged=FALSE), 0) expect_equal(cm$negative_predictive_value(), 0.769, tolerance=0.000231) expect_equal(cm$num_false_negatives(), 3.0) expect_equal(cm$num_false_positives(), 6.0) expect_equal(cm$num_generalized_false_negatives(), 3.0) expect_equal(cm$num_generalized_false_positives(), 6.0) expect_equal(cm$num_generalized_true_negatives(), 10.0) expect_equal(cm$num_generalized_true_positives(), 1.0) expect_equal(class(cm$performance_measures()), "list") expect_equal(cm$positive_predictive_value(), 0.143, tolerance=0.5) expect_equal(cm$power(), 1) expect_equal(cm$precision(), 0.143, tolerance=0.5) expect_equal(cm$recall(), 0.25) expect_equal(cm$selection_rate(), 0.35) expect_equal(cm$specificity(), 0.625) expect_equal(cm$sensitivity(), 0.25) expect_equal(cm$statistical_parity_difference(), 0.172, tolerance=0.5) expect_equal(cm$theil_index(), 0.220, tolerance=0.5) expect_equal(cm$true_negative_rate(), 0.625) expect_equal(cm$true_positive_rate(), 0.25) expect_equal(cm$true_positive_rate_difference(), -0.5) })