AML / utils.py
adollbo's picture
added more utils to retrieve sus data and get true and fake model certainty
5810688
raw
history blame
4.5 kB
import streamlit.components.v1 as components
from random import randrange, uniform
import pandas as pd
import joblib
import dill
from omnixai.data.tabular import Tabular
def ChangeButtonColour(widget_label, font_color, background_color='transparent'):
htmlstr = f"""
<script>
var elements = window.parent.document.querySelectorAll('button');
for (var i = 0; i < elements.length; ++i) {{
if (elements[i].innerText == '{widget_label}') {{
elements[i].style.color ='{font_color}';
elements[i].style.background = '{background_color}'
}}
}}
</script>
"""
components.html(f"{htmlstr}", height=0, width=0)
def first_five_posneg_indices(response_body):
values = response_body["explanations"][0]["shap_values"]
# Separate positive and negative values, keep indice as corresponds to key
positive_dict = {index: val for index, val in enumerate(values) if val > 0}
negative_dict = {index: val for index, val in enumerate(values) if val < 0}
# Sort dictionaries based on the magnitude of values
sorted_positive_indices = [index for index, _ in sorted(positive_dict.items(), key=lambda item: abs(item[1]), reverse=True)]
sorted_negative_indices = [index for index, _ in sorted(negative_dict.items(), key=lambda item: abs(item[1]), reverse=True)]
return [sorted_positive_indices[:5], sorted_negative_indices[:5]]
def get_texts(posneg_indices, feature_texts):
positive_texts = [feature_texts[x] for x in posneg_indices[0]]
negative_texts = [feature_texts[x] for x in posneg_indices[1]]
return positive_texts, negative_texts
def get_input_values(posneg_indices, data_instance):
values = data_instance["instances"][0]
outputs = []
for lst in posneg_indices:
vals = []
for idx in lst:
if idx in range(7,12):
val = str(bool(values[idx])).capitalize()
else:
val = values[idx]
vals.append(val)
outputs.append(vals)
return outputs[0], outputs[1]
response = {
"predictions": [
True
],
"explanations": [
{
"shap_values": [
-0.020634920634920784,
-0.053968253968253166,
-0.0015873015873012486,
0,
0.04603174603174587,
-0.12063492063492065,
0.8365079365079348,
-0.16349206349206302,
0.12222222222222279,
-0.04444444444444462,
-0.02444444444444462,
0.03603174603174587,
],
"expected_value": 0.4
}
]
}
feature_texts = {0: "Day out of 30", 1: "Type of transaction: ", 2: "Amount transferred: ", 3: "Initial balance of sender: ", 4: "New balance of sender: ",
5: "Initial balance of recipient: ", 6: "New balance of recipient: ", 7: "Sender's balance was exactly credited: ",
8: "Receiver's balance was exactly credited: ", 9: "Transaction over 450.000: ", 10: "Frequent receiver of transactions: ", 11: "Receiver is merchant: ", 12: "Sender ID: ", 13: "Receiver ID: "}
example_input = {"instances":[[1,"PAYMENT",9839.64,170136,160296.36,0,0,1,1,0,0,1,84,2424]]}
def get_fake_certainty():
# Generate a random certainty between 75% and 99%
fake_certainty = uniform(0.75, 0.99)
formatted_fake_certainty = "{:.2%}".format(fake_certainty)
return formatted_fake_certainty
def get_random_suspicious_transaction():
data = pd.read_pickle("data/preprocessed_data.pkl")
suspicious_data=data[data["isFraud"]==1]
max_n=len(suspicious_data)
random_nr=randrange(max_n)
suspicous_transaction = suspicious_data[random_nr-1:random_nr].drop("isFraud", axis=1)
return suspicous_transaction
def get_model_certainty(data_point):
# load the trained model
model = joblib.load('model/model.joblib')
# load transformer
with open("transformer/transformer.dill", "rb") as f:
transformer = dill.load(f)
# create tabular object of datapoint and apply transformation
sample = Tabular(data_point, categorical_columns=[1])
transformed = transformer.transform(sample)
# get model certainty for sample and transform it to smth intelligble
probability = model.predict_proba(transformed)
positive_class_probability = probability[:, 1][0]
formatted_probability = "{:.5%}".format(positive_class_probability)
# Print the formatted probability
return formatted_probability