Spaces:

deeploy
/

AML

Sleeping

App Files Files Community

adollbo commited on Feb 13, 2024

Commit

c8a77b2

1 Parent(s): 0df0d2a

added main data manipulation functions

Browse files

Files changed (1) hide show

utils.py +99 -96

utils.py CHANGED Viewed

@@ -1,88 +1,76 @@
 import streamlit.components.v1 as components
 from random import randrange, uniform
 import pandas as pd
 import joblib
 import dill
-from omnixai.data.tabular import Tabular
-def ChangeButtonColour(widget_label, font_color, background_color='transparent'):
-    htmlstr = f"""
-        <script>
-            var elements = window.parent.document.querySelectorAll('button');
-            for (var i = 0; i < elements.length; ++i) {{
-                if (elements[i].innerText == '{widget_label}') {{
-                    elements[i].style.color ='{font_color}';
-                    elements[i].style.background = '{background_color}'
-                }}
-            }}
-        </script>
-        """
-    components.html(f"{htmlstr}", height=0, width=0)
-def first_five_posneg_indices(response_body):
-    values = response_body["explanations"][0]["shap_values"]
     # Separate positive and negative values, keep indice as corresponds to key
-    positive_dict = {index: val for index, val in enumerate(values) if val > 0}
-    negative_dict = {index: val for index, val in enumerate(values) if val < 0}
     # Sort dictionaries based on the magnitude of values
     sorted_positive_indices = [index for index, _ in sorted(positive_dict.items(), key=lambda item: abs(item[1]), reverse=True)]
-    sorted_negative_indices = [index for index, _ in sorted(negative_dict.items(), key=lambda item: abs(item[1]), reverse=True)]
-    return [sorted_positive_indices[:5], sorted_negative_indices[:5]]
-def get_texts(posneg_indices, feature_texts):
-    positive_texts = [feature_texts[x] for x in posneg_indices[0]]
-    negative_texts = [feature_texts[x] for x in posneg_indices[1]]
-    return positive_texts, negative_texts
-def get_input_values(posneg_indices, data_instance):
-    values = data_instance["instances"][0]
-    outputs = []
-    for lst in posneg_indices:
-        vals = []
-        for idx in lst:
-            if idx in range(7,12):
-                val = str(bool(values[idx])).capitalize()
-            else:
-                val = values[idx]
-            vals.append(val)
-        outputs.append(vals)
-    return outputs[0], outputs[1]
-response = {
-  "predictions": [
-    True
-  ],
-  "explanations": [
-    {
-      "shap_values": [
-        -0.020634920634920784,
-        -0.053968253968253166,
-        -0.0015873015873012486,
-        0,
-        0.04603174603174587,
-        -0.12063492063492065,
-        0.8365079365079348,
-        -0.16349206349206302,
-        0.12222222222222279,
-        -0.04444444444444462,
-        -0.02444444444444462,
-        0.03603174603174587,
-      ],
-      "expected_value": 0.4
-    }
-  ]
-}
-feature_texts = {0: "Day out of 30", 1: "Type of transaction: ", 2: "Amount transferred: ", 3: "Initial balance of sender: ", 4: "New balance of sender: ",
-                 5: "Initial balance of recipient: ", 6: "New balance of recipient: ", 7: "Sender's balance was exactly credited: ",
-                   8: "Receiver's balance was exactly credited: ", 9: "Transaction over 450.000: ", 10: "Frequent receiver of transactions: ", 11: "Receiver is merchant: ", 12: "Sender ID: ", 13: "Receiver ID: "}
-example_input = {"instances":[[1,"PAYMENT",9839.64,170136,160296.36,0,0,1,1,0,0,1,84,2424]]}
 def get_fake_certainty():
     # Generate a random certainty between 75% and 99%
@@ -91,8 +79,7 @@ def get_fake_certainty():
     return formatted_fake_certainty
-def get_random_suspicious_transaction():
-    data = pd.read_pickle("data/preprocessed_data.pkl")
     suspicious_data=data[data["isFraud"]==1]
     max_n=len(suspicious_data)
     random_nr=randrange(max_n)
@@ -100,20 +87,36 @@ def get_random_suspicious_transaction():
     return suspicous_transaction
-def get_model_certainty(data_point):
-    # load the trained model
-    model = joblib.load('model/model.joblib')
-    # load transformer
-    with open("transformer/transformer.dill", "rb") as f:
-            transformer = dill.load(f)
-    # create tabular object of datapoint and apply transformation
-    sample = Tabular(data_point, categorical_columns=[1])
-    transformed = transformer.transform(sample)
-    # get model certainty for sample and transform it to smth intelligble
-    probability = model.predict_proba(transformed)
-    positive_class_probability = probability[:, 1][0]
-    formatted_probability = "{:.5%}".format(positive_class_probability)
-    # Print the formatted probability
-    return formatted_probability

 import streamlit.components.v1 as components
+import streamlit as st
 from random import randrange, uniform
 import pandas as pd
 import joblib
 import dill
+import logging
+import numpy as np
+COL_NAMES = ['Time step',
+ 'Transaction type',
+ 'Amount transferred',
+ 'Sender\'s initial balance',
+ 'Sender\'s new balance',
+ 'Recipient\'s initial balance',
+ 'Recipient\'s new balance',
+ "Sender exactly credited",
+ "Receiver exactly credited",
+ 'Amount > 450 000',
+ 'Frequent receiver',
+ 'Merchant receiver',
+ 'Sender ID',
+ 'Receiver ID']
+feature_texts = {0: "Time step: ", 1: "Amount transferred: ", 2: "Initial balance of sender: ", 3: "New balance of sender: ",
+                 4: "Initial balance of recipient: ", 5: "New balance of recipient: ", 6: "Sender's balance was exactly credited: ",
+                   7: "Receiver's balance was exactly credited: ", 8: "Transaction over 450.000: ", 9: "Frequent receiver of transactions: ", 10: "Receiver is merchant: ", 11: "Sender ID: ", 12: "Receiver ID: ",
+                   13: "Transaction type is Cash out", 14: "Transaction type is Transfer", 15: "Transaction type is Payment", 16: "Transaction type is Cash in", 17: "Transaction type is Debit"}
+CATEGORIES = np.array(['CASH_OUT', 'TRANSFER', 'PAYMENT', 'CASH_IN', 'DEBIT'])
+def transformation(input, categories):
+    new_x = input
+    cat = np.array(input[1])
+    del new_x[1]
+    result_array = np.zeros(5, dtype=int)
+    match_index = np.where(categories == cat)[0]
+    result_array[match_index] = 1
+    new_x.extend(result_array.tolist())
+    return new_x
+def get_request_body(datapoint):
+    data = datapoint.iloc[0].tolist()
+    instances = [int(x) if isinstance(x, (np.int32, np.int64)) else x for x in data]
+    request_body = {'instances': [instances]}
+    return request_body
+def get_explainability_texts(shap_values, feature_texts):
     # Separate positive and negative values, keep indice as corresponds to key
+    positive_dict = {index: val for index, val in enumerate(shap_values) if val > 0}
     # Sort dictionaries based on the magnitude of values
     sorted_positive_indices = [index for index, _ in sorted(positive_dict.items(), key=lambda item: abs(item[1]), reverse=True)]
+    positive_texts = [feature_texts[x] for x in sorted_positive_indices]
+    if len(positive_texts) > 8:
+        positive_texts = positive_texts[:8]
+    return positive_texts, sorted_positive_indices
+def get_explainability_values(pos_indices, datapoint):
+    data = datapoint.iloc[0].tolist()
+    transformed_data = transformation(input=data, categories=CATEGORIES)
+    print(transformed_data)
+    vals = []
+    for idx in pos_indices:
+        if idx in range(7,11) or idx in range(13,18):
+            val = str(bool(data[idx])).capitalize()
+        else:
+            val = data[idx]
+        vals.append(val)
+    if len(vals) > 8:
+        vals = vals[:8]
+    return vals
 def get_fake_certainty():
     # Generate a random certainty between 75% and 99%
     return formatted_fake_certainty
+def get_random_suspicious_transaction(data):
     suspicious_data=data[data["isFraud"]==1]
     max_n=len(suspicious_data)
     random_nr=randrange(max_n)
     return suspicous_transaction
+def send_evaluation(client, deployment_id, request_log_id, prediction_log_id, evaluation_input):
+    """Send evaluation to Deeploy."""
+    try:
+        with st.spinner("Submitting response..."):
+            # Call the explain endpoint as it also includes the prediction
+            client.evaluate(deployment_id, request_log_id, prediction_log_id, evaluation_input)
+        return True
+    except Exception as e:
+        logging.error(e)
+        st.error(
+            "Failed to submit feedback."
+            + "Check whether you are using the right model URL and Token. "
+            + "Contact Deeploy if the problem persists."
+        )
+        st.write(f"Error message: {e}")
+def get_model_url():
+    """Get model url and retrieve workspace id and deployment id from it"""
+    model_url = st.text_area(
+        "Model URL (default is the demo deployment)",
+        "https://api.app.deeploy.ml/workspaces/708b5808-27af-461a-8ee5-80add68384c7/deployments/ac56dbdf-ba04-462f-aa70-5a0d18698e42/",
+        height=125,
+    )
+    elems = model_url.split("/")
+    try:
+        workspace_id = elems[4]
+        deployment_id = elems[6]
+    except IndexError:
+        workspace_id = ""
+        deployment_id = ""
+    return model_url, workspace_id, deployment_id