process_mining / pm4py /examples /llm /03_case_procedural.py
linpershey's picture
Add 'pm4py/' from commit '80970016c5e1e79af7c37df0dd88e17587fe7bcf'
b4ba3ec
raw
history blame
2.76 kB
import pm4py
import os
import duckdb
def execute_script():
"""
Measures the quality of the SQL query provided in 02_...
to isolate the procedural behavior leading to discrimination,
and assess the quality of the classification against the ground truth written in the log.
"""
dataframe = pm4py.read_xes("../../tests/input_data/fairness/renting_log_high.xes.gz")
protected_attr = [x for x in dataframe.columns if "protected" in x][0]
sql_query = """
WITH cases AS (
SELECT
"case:concept:name",
STRING_AGG("concept:name", ' -> ') OVER (PARTITION BY "case:concept:name" ORDER BY "time:timestamp") AS variant
FROM
dataframe
),
filtered_cases AS (
SELECT
"case:concept:name"
FROM
cases
WHERE variant IN (
'Request Appointment -> Set Appointment -> Hand In Credit Appliaction -> Verify Borrowers Information -> Submit File to Underwriter -> Loan Denied',
'Request Appointment -> Set Appointment -> Hand In Credit Appliaction -> Verify Borrowers Information -> Application Rejected',
'Request Appointment -> Appointment Denied',
'Request Appointment -> Set Appointment -> Hand In Credit Appliaction -> Verify Borrowers Information -> Request Co-Signer On Loan -> Submit File to Underwriter -> Loan Denied',
'Request Appointment -> Set Appointment -> Hand In Credit Appliaction -> Verify Borrowers Information -> Make Visit to Assess Colatteral -> Submit File to Underwriter -> Loan Denied',
'Request Appointment -> Set Appointment -> Hand In Credit Appliaction -> Verify Borrowers Information -> Make Visit to Assess Colatteral -> Submit File to Underwriter -> Sign Loan Agreement'
)
GROUP BY
"case:concept:name"
)
SELECT
df.*,
cases.variant
FROM
dataframe AS df
JOIN
filtered_cases ON df."case:concept:name" = filtered_cases."case:concept:name"
JOIN
cases ON df."case:concept:name" = cases."case:concept:name"
"""
dataframe_pos = duckdb.sql(sql_query).to_df()
cases_pos = dataframe_pos["case:concept:name"].unique()
dataframe_neg = dataframe[~dataframe["case:concept:name"].isin(cases_pos)]
dataframe_pos = dataframe_pos.groupby("case:concept:name").first()
dataframe_neg = dataframe_neg.groupby("case:concept:name").last()
tp = len(dataframe_pos[dataframe_pos[protected_attr] == True])
fp = len(dataframe_pos[dataframe_pos[protected_attr] == False])
print("true positives", tp)
print("false positives", fp)
fn = len(dataframe_neg[dataframe_neg[protected_attr] == True])
tn = len(dataframe_neg[dataframe_neg[protected_attr] == False])
print("false negatives", fn)
print("true negatives", tn)
if __name__ == "__main__":
execute_script()