adollbo commited on
Commit
c8a77b2
·
1 Parent(s): 0df0d2a

added main data manipulation functions

Browse files
Files changed (1) hide show
  1. utils.py +99 -96
utils.py CHANGED
@@ -1,88 +1,76 @@
1
  import streamlit.components.v1 as components
 
2
  from random import randrange, uniform
3
  import pandas as pd
4
  import joblib
5
  import dill
6
- from omnixai.data.tabular import Tabular
7
-
8
- def ChangeButtonColour(widget_label, font_color, background_color='transparent'):
9
- htmlstr = f"""
10
- <script>
11
- var elements = window.parent.document.querySelectorAll('button');
12
- for (var i = 0; i < elements.length; ++i) {{
13
- if (elements[i].innerText == '{widget_label}') {{
14
- elements[i].style.color ='{font_color}';
15
- elements[i].style.background = '{background_color}'
16
- }}
17
- }}
18
- </script>
19
- """
20
- components.html(f"{htmlstr}", height=0, width=0)
21
-
22
-
23
- def first_five_posneg_indices(response_body):
24
- values = response_body["explanations"][0]["shap_values"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  # Separate positive and negative values, keep indice as corresponds to key
26
- positive_dict = {index: val for index, val in enumerate(values) if val > 0}
27
- negative_dict = {index: val for index, val in enumerate(values) if val < 0}
28
-
29
  # Sort dictionaries based on the magnitude of values
30
  sorted_positive_indices = [index for index, _ in sorted(positive_dict.items(), key=lambda item: abs(item[1]), reverse=True)]
31
- sorted_negative_indices = [index for index, _ in sorted(negative_dict.items(), key=lambda item: abs(item[1]), reverse=True)]
32
-
33
- return [sorted_positive_indices[:5], sorted_negative_indices[:5]]
34
-
35
- def get_texts(posneg_indices, feature_texts):
36
- positive_texts = [feature_texts[x] for x in posneg_indices[0]]
37
- negative_texts = [feature_texts[x] for x in posneg_indices[1]]
38
- return positive_texts, negative_texts
39
-
40
-
41
- def get_input_values(posneg_indices, data_instance):
42
- values = data_instance["instances"][0]
43
- outputs = []
44
- for lst in posneg_indices:
45
- vals = []
46
- for idx in lst:
47
- if idx in range(7,12):
48
- val = str(bool(values[idx])).capitalize()
49
- else:
50
- val = values[idx]
51
- vals.append(val)
52
- outputs.append(vals)
53
- return outputs[0], outputs[1]
54
-
55
- response = {
56
- "predictions": [
57
- True
58
- ],
59
- "explanations": [
60
- {
61
- "shap_values": [
62
- -0.020634920634920784,
63
- -0.053968253968253166,
64
- -0.0015873015873012486,
65
- 0,
66
- 0.04603174603174587,
67
- -0.12063492063492065,
68
- 0.8365079365079348,
69
- -0.16349206349206302,
70
- 0.12222222222222279,
71
- -0.04444444444444462,
72
- -0.02444444444444462,
73
- 0.03603174603174587,
74
- ],
75
- "expected_value": 0.4
76
- }
77
- ]
78
- }
79
-
80
- feature_texts = {0: "Day out of 30", 1: "Type of transaction: ", 2: "Amount transferred: ", 3: "Initial balance of sender: ", 4: "New balance of sender: ",
81
- 5: "Initial balance of recipient: ", 6: "New balance of recipient: ", 7: "Sender's balance was exactly credited: ",
82
- 8: "Receiver's balance was exactly credited: ", 9: "Transaction over 450.000: ", 10: "Frequent receiver of transactions: ", 11: "Receiver is merchant: ", 12: "Sender ID: ", 13: "Receiver ID: "}
83
-
84
- example_input = {"instances":[[1,"PAYMENT",9839.64,170136,160296.36,0,0,1,1,0,0,1,84,2424]]}
85
-
86
 
87
  def get_fake_certainty():
88
  # Generate a random certainty between 75% and 99%
@@ -91,8 +79,7 @@ def get_fake_certainty():
91
  return formatted_fake_certainty
92
 
93
 
94
- def get_random_suspicious_transaction():
95
- data = pd.read_pickle("data/preprocessed_data.pkl")
96
  suspicious_data=data[data["isFraud"]==1]
97
  max_n=len(suspicious_data)
98
  random_nr=randrange(max_n)
@@ -100,20 +87,36 @@ def get_random_suspicious_transaction():
100
  return suspicous_transaction
101
 
102
 
103
- def get_model_certainty(data_point):
104
- # load the trained model
105
- model = joblib.load('model/model.joblib')
106
- # load transformer
107
- with open("transformer/transformer.dill", "rb") as f:
108
- transformer = dill.load(f)
109
-
110
- # create tabular object of datapoint and apply transformation
111
- sample = Tabular(data_point, categorical_columns=[1])
112
- transformed = transformer.transform(sample)
113
-
114
- # get model certainty for sample and transform it to smth intelligble
115
- probability = model.predict_proba(transformed)
116
- positive_class_probability = probability[:, 1][0]
117
- formatted_probability = "{:.5%}".format(positive_class_probability)
118
- # Print the formatted probability
119
- return formatted_probability
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit.components.v1 as components
2
+ import streamlit as st
3
  from random import randrange, uniform
4
  import pandas as pd
5
  import joblib
6
  import dill
7
+ import logging
8
+ import numpy as np
9
+
10
+ COL_NAMES = ['Time step',
11
+ 'Transaction type',
12
+ 'Amount transferred',
13
+ 'Sender\'s initial balance',
14
+ 'Sender\'s new balance',
15
+ 'Recipient\'s initial balance',
16
+ 'Recipient\'s new balance',
17
+ "Sender exactly credited",
18
+ "Receiver exactly credited",
19
+ 'Amount > 450 000',
20
+ 'Frequent receiver',
21
+ 'Merchant receiver',
22
+ 'Sender ID',
23
+ 'Receiver ID']
24
+
25
+ feature_texts = {0: "Time step: ", 1: "Amount transferred: ", 2: "Initial balance of sender: ", 3: "New balance of sender: ",
26
+ 4: "Initial balance of recipient: ", 5: "New balance of recipient: ", 6: "Sender's balance was exactly credited: ",
27
+ 7: "Receiver's balance was exactly credited: ", 8: "Transaction over 450.000: ", 9: "Frequent receiver of transactions: ", 10: "Receiver is merchant: ", 11: "Sender ID: ", 12: "Receiver ID: ",
28
+ 13: "Transaction type is Cash out", 14: "Transaction type is Transfer", 15: "Transaction type is Payment", 16: "Transaction type is Cash in", 17: "Transaction type is Debit"}
29
+
30
+ CATEGORIES = np.array(['CASH_OUT', 'TRANSFER', 'PAYMENT', 'CASH_IN', 'DEBIT'])
31
+
32
+ def transformation(input, categories):
33
+ new_x = input
34
+ cat = np.array(input[1])
35
+ del new_x[1]
36
+ result_array = np.zeros(5, dtype=int)
37
+ match_index = np.where(categories == cat)[0]
38
+ result_array[match_index] = 1
39
+ new_x.extend(result_array.tolist())
40
+ return new_x
41
+
42
+ def get_request_body(datapoint):
43
+ data = datapoint.iloc[0].tolist()
44
+ instances = [int(x) if isinstance(x, (np.int32, np.int64)) else x for x in data]
45
+ request_body = {'instances': [instances]}
46
+ return request_body
47
+
48
+
49
+ def get_explainability_texts(shap_values, feature_texts):
50
  # Separate positive and negative values, keep indice as corresponds to key
51
+ positive_dict = {index: val for index, val in enumerate(shap_values) if val > 0}
 
 
52
  # Sort dictionaries based on the magnitude of values
53
  sorted_positive_indices = [index for index, _ in sorted(positive_dict.items(), key=lambda item: abs(item[1]), reverse=True)]
54
+ positive_texts = [feature_texts[x] for x in sorted_positive_indices]
55
+ if len(positive_texts) > 8:
56
+ positive_texts = positive_texts[:8]
57
+ return positive_texts, sorted_positive_indices
58
+
59
+
60
+ def get_explainability_values(pos_indices, datapoint):
61
+ data = datapoint.iloc[0].tolist()
62
+ transformed_data = transformation(input=data, categories=CATEGORIES)
63
+ print(transformed_data)
64
+ vals = []
65
+ for idx in pos_indices:
66
+ if idx in range(7,11) or idx in range(13,18):
67
+ val = str(bool(data[idx])).capitalize()
68
+ else:
69
+ val = data[idx]
70
+ vals.append(val)
71
+ if len(vals) > 8:
72
+ vals = vals[:8]
73
+ return vals
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
  def get_fake_certainty():
76
  # Generate a random certainty between 75% and 99%
 
79
  return formatted_fake_certainty
80
 
81
 
82
+ def get_random_suspicious_transaction(data):
 
83
  suspicious_data=data[data["isFraud"]==1]
84
  max_n=len(suspicious_data)
85
  random_nr=randrange(max_n)
 
87
  return suspicous_transaction
88
 
89
 
90
+ def send_evaluation(client, deployment_id, request_log_id, prediction_log_id, evaluation_input):
91
+ """Send evaluation to Deeploy."""
92
+ try:
93
+ with st.spinner("Submitting response..."):
94
+ # Call the explain endpoint as it also includes the prediction
95
+ client.evaluate(deployment_id, request_log_id, prediction_log_id, evaluation_input)
96
+ return True
97
+ except Exception as e:
98
+ logging.error(e)
99
+ st.error(
100
+ "Failed to submit feedback."
101
+ + "Check whether you are using the right model URL and Token. "
102
+ + "Contact Deeploy if the problem persists."
103
+ )
104
+ st.write(f"Error message: {e}")
105
+
106
+
107
+ def get_model_url():
108
+ """Get model url and retrieve workspace id and deployment id from it"""
109
+ model_url = st.text_area(
110
+ "Model URL (default is the demo deployment)",
111
+ "https://api.app.deeploy.ml/workspaces/708b5808-27af-461a-8ee5-80add68384c7/deployments/ac56dbdf-ba04-462f-aa70-5a0d18698e42/",
112
+ height=125,
113
+ )
114
+ elems = model_url.split("/")
115
+ try:
116
+ workspace_id = elems[4]
117
+ deployment_id = elems[6]
118
+ except IndexError:
119
+ workspace_id = ""
120
+ deployment_id = ""
121
+ return model_url, workspace_id, deployment_id
122
+