Spaces:

Anniek
/

XAI

Runtime error

App Files Files Community

Anniek commited on Apr 4, 2023

Commit

b0ff21d

1 Parent(s): ae67462

Upload 7 files

Browse files

Files changed (7) hide show

pages/1-explanationpage.py +103 -0
pages/2_SHAP.py +196 -0
pages/3_DecisionTree.py +264 -0
pages/4_counterfactual.py +215 -0
pages/5_visualMap.py +179 -0
pages/6_finalpage.py +47 -0
pages/7_thankyou.py +13 -0

pages/1-explanationpage.py ADDED Viewed

	@@ -0,0 +1,103 @@

+import streamlit as st
+from uuid import uuid4
+from streamlit_extras.switch_page_button import switch_page
+import random
+import pandas as pd
+import xgboost as xgb
+import copy
+header1, header2, header3 = st.columns([1,2,1])
+body1, body2, body3 =st.columns([1,2,1])
+footer1, footer2, footer3 =st.columns([1,2,1])
+if 'nextPage' not in st.session_state:
+    st.session_state.nextPage = random.randint(0, len(st.session_state.pages)-1)
+# st.write(st.session_state.nextPage)
+@st.cache_data
+def loadData():
+    train_df = pd.read_csv('assets/train_df.csv')
+    test_df = pd.read_csv('assets/test_df.csv')
+    test_with_names = pd.read_csv('assets/test_with_names.csv')
+   # test_with_names.drop('PassengerId', axis=1, inplace=True)
+    X_train = train_df.drop("Survived", axis=1)
+    Y_train = train_df["Survived"]
+    X_test = test_df.drop('PassengerId', axis=1)
+    X_test_names = test_with_names.copy()
+    title_df = pd.DataFrame({'Title indices': [1,2,3,4,5],
+                             'Title': ['Mr', 'Miss', 'Mrs', 'Master', 'Rare'] })
+    gender_df = pd.DataFrame({'Gender indices': [0,1],
+                             'Sex': ['Male', 'Female'] })
+    ports_df = pd.DataFrame({'Ports indices': [0,1,2],
+                             'Embarked': ['Southampton', 'Cherbourg', 'Quenstown'] })
+    return X_train, Y_train, X_test, X_test_names, title_df, gender_df, ports_df, train_df
+if 'X_train' not in st.session_state:
+    st.session_state.X_train, st.session_state.Y_train, st.session_state.X_test, st.session_state.X_test_names, st.session_state.title_df, st.session_state.gender_df, st.session_state.ports_df, st.session_state.train_df = loadData()
+    # st.dataframe(st.session_state.X_train)
+    # st.session_state.X_train, st.session_state.Y_train, st.session_state.X_test, st.session_state.X_test_names= loadData()
+with header2:
+    st.title("Who survived and why?")
+    # st.dataframe(st.session_state.X_train)
+    # st.write("For debugging:")
+    # st.write(st.session_state.participantID)
+    # X_train, Y_train, X_test= loadData()
+with body2:
+    st.header("The Titanic")
+    st.markdown("In the year 1912, the Titanic left from Southampton to New York City, but it never arrived. On April 15, it crashed into an iceberg and sunk. Of the estimated 2,224 passengers and crew aboard, more than 1,500 died, making it the deadliest sinking of a single ship up to that time. ")
+    st.image('assets/titanic.jpg')
+    st.header('Explanation experiment')
+    st.markdown('''In this experiment we will show you two different profiles of passengers.
+    Using Machine Learning (ML) we will show a prediction whether they would have survived the disaster.
+    This prediction is accompanied by each time a different type of explanation.''')
+    st.markdown("After seeing 2 profiles, you will be asked to evaluate the explanation you have just seen.")
+    st.subheader('Demographic information')
+    st.markdown("Before you start with the study we would like to ask you to first answer these questions")
+with footer2:
+    with st.form("demographic_form", clear_on_submit=True):
+        gender = st.radio("How do you identify your gender", ('Female', 'Male', 'Non-binary', 'Other', 'Prefer not to say'))
+        age  = st.radio("How old are you?", ('18-25', '26-35', '36-45', '46-55', '56-65', '66-75', '75+'))
+        educationlevel = st.radio("What is your highest level of education?",
+        ('elementary school', 'high school', 'MBO', 'HBO', 'University'))
+        st.markdown('**AI literacy**')
+        st.markdown('Please rate to what extent you have the skills/knowledge listed below. 0 means that he ability is hardly or not at all pronounced, whereas a value of 10 means that the ability is very well or almost perfectly pronounced')
+        q1 = st.slider('I know the most important concepts of the topic "artificial intelligence"', 0, 10)
+        q2 = st.slider("I know definitions of artificial intelligence", 0, 10 )
+        q3 = st.slider("I can assess what the limitations and opportunities of using an AI are", 0, 10)
+        q4 = st.slider("I can assess what advantages and disadvantages the  use of an artificial intelligence entails", 0, 10)
+        q5 = st.slider("I can think of new uses for AI.", 0, 10)
+        q6 = st.slider("I can imagine possible future uses of AI", 0, 10)
+        st.markdown('''On the next page you will see a profile of one of the passengers of the Titanic,
+        a prediction of whether they would have survived and an explanation for why the model made this prediction. Have a look at this and then generate a new profile by clicking on the button.
+        You can look at 2 profiles, next you will be asked to evaluate the explanation.
+        These steps will be repeated in total 4 times after which you will be asked some final questions.  ''')
+        submitted = st.form_submit_button("Start the experiment")
+        if submitted:
+            st.session_state.oocsi.send('EngD_HAII_demographics', {
+                    'participant_ID': st.session_state.participantID,
+                    'gender': gender,
+                    'age': age,
+                    'educationLevel': educationlevel,
+                    'q1': q1,
+                    'q2': q2,
+                    'q3': q3,
+                    'q4': q4,
+                    'q5': q5,
+                    'q6': q6,
+                    })
+        # if st.button("Start the experiment "):
+            switch_page(st.session_state.pages[st.session_state.nextPage])

pages/2_SHAP.py ADDED Viewed

	@@ -0,0 +1,196 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+from oocsi_source import OOCSI
+from uuid import uuid4
+from streamlit_extras.switch_page_button import switch_page
+import random
+import shap
+from IPython.display import display_html
+import xgboost as xgb
+import matplotlib.pyplot as plt
+# st.markdown("""<style>
+# .stSlider {
+#     padding-bottom: 20px;
+#     }
+#     </style> """,
+#     unsafe_allow_html=True)
+#Delete this page from the array of pages to visit, this way it cannot be visited twice
+if 'profile1' not in st.session_state:
+    st.session_state.pages.remove("SHAP")
+    st.session_state.profile1= 'deleted'
+    if (len(st.session_state.pages)>0):
+        st.session_state.nextPage1 = random.randint(0, len(st.session_state.pages)-1)
+        st.session_state.lastQuestion= 'no'
+    else:
+        st.session_state.lastQuestion= 'yes'
+if 'index1' not in st.session_state:
+    st.session_state.index1= 0
+if 'profileIndex' not in st.session_state:
+    st.session_state.profileIndex= st.session_state.profileIndices[st.session_state.index1]
+header1, header2, header3 = st.columns([1,2,1])
+characteristics1, characteristics2, characteristics3 = st.columns([1,2,1])
+prediction1, prediction2, prediction3 =st.columns([1,2,1])
+explanation1, explanation2, explanation3 = st.columns([1,5,1])
+footer1, footer2, footer3 =st.columns([1,2,1])
+evaluation1, evaluation2, evaluation3 = st.columns([1,2,1])
+name= st.session_state.X_test_names.loc[st.session_state.profileIndex, "Name"]
+@st.cache_resource
+def trainModel(X_train,Y_train):
+    model = xgb.XGBClassifier().fit(X_train, Y_train)
+    return model
+@st.cache_resource
+def getSHAPvalues(_model,X_train, Y_train, X_test):
+    # compute SHAP values
+    explainer = shap.Explainer(_model, X_test)
+    shap_values = explainer(X_test)
+    return shap_values
+def shapPlot(X_test, _shap_values):
+    return shap.plots.waterfall(shap_values[st.session_state.profileIndex])
+with header2:
+    st.header('Explanation - SHAP Values')
+    st.markdown('''  The SHAP value algorithm (SHapley Additive exPlanations) is a way to reverse-engineer the output of any predictive machine learning model.
+    the technique helps to understand the decision took by a complex model. The classical models will typically answer the question 'how much' whereas the SHAP
+    model will focus on the 'why'.
+    Finally, the representation of the SHAP value will show how much each feature are contributing to the final prediction made by the model. For the Titanic dataset, each feature
+    will analyse each the contribution of each will be presented for different persons explaining the reason why this person survived the shipwreck or not.
+    ''')
+    st.subheader(name, anchor='top')
+    # st.write("For debugging:")
+    # st.write(st.session_state.participantID)
+    XGBmodel= trainModel(st.session_state.X_train, st.session_state.Y_train)
+with characteristics2:
+    # initialize list of lists
+    data = st.session_state.X_test.iloc[st.session_state.profileIndex].values.reshape(1, -1)
+    # Create the pandas DataFrame
+    df = pd.DataFrame(data, columns=st.session_state.X_test.columns)
+    st.dataframe(df)
+with prediction2:
+    # st.header("Prediction")
+    prediction =  XGBmodel.predict(st.session_state.X_test.iloc[st.session_state.profileIndex].values.reshape(1, -1))
+    probability = XGBmodel.predict_proba(st.session_state.X_test.iloc[st.session_state.profileIndex].values.reshape(1, -1))
+    if prediction == 0:
+        prob = round((probability[0][0]*100),2)
+        st.markdown("The model predicts with {}% probability  that {}  will :red[**not survive**]".format(prob, name) )
+    else:
+        prob = round((probability[0][1]*100),2)
+        st.markdown("The model predicts with {}% probability  that {}  will :green[**survive**]".format(prob, name) )
+with explanation2:
+    st.subheader("Explanation")
+    # with st.spinner("Please be patient, we are generating a new explanation"):
+    shap_values= getSHAPvalues(XGBmodel, st.session_state.X_train, st.session_state.Y_train, st.session_state.X_test)
+    st.set_option('deprecation.showPyplotGlobalUse', False)
+    fig = shap.plots.waterfall(shap_values[st.session_state.profileIndex])
+    st.pyplot(fig, bbox_inches='tight')
+    data_indices = pd.concat([d.reset_index(drop=True) for d in [st.session_state.ports_df, st.session_state.title_df, st.session_state.gender_df]], axis=1)
+    # st.dataframe(st.session_state.ports_df)
+    # st.dataframe(st.session_state.title_df)
+    # st.dataframe(st.session_state.gender_df)
+    st.dataframe(data_indices)
+with footer2:
+    if (st.session_state.index1 < len(st.session_state.profileIndices)-1):
+        if st.button("New profile"):
+            st.session_state.index1 = st.session_state.index1+1
+            st.session_state.profileIndex = st.session_state.profileIndices[st.session_state.index1]
+            st.experimental_rerun()
+    else:
+        def is_user_active():
+            if 'user_active1' in st.session_state.keys() and st.session_state['user_active1']:
+                return True
+            else:
+                return False
+        if is_user_active():
+        # st.markdown("You have reached the end of the profiles")
+        # if st.button("Continue to evaluation"):
+        #     st.write(" ")
+            with st.form("my_form1", clear_on_submit=True):
+                st.subheader("Evaluation")
+                st.write("These questions only ask for your opinion about this specific explanation")
+                q1 = st.select_slider(
+                '**1**- From the explanation, I **understand** how the algorithm works:',
+                options=['totally disagree', 'disagree', 'neutral' , 'agree', 'totally agree'])
+                q2 = st.select_slider(
+                '**2**- This explanation of how the algorithm works is **satisfying**:',
+                options=['totally disagree', 'disagree', 'neutral' , 'agree', 'totally agree'])
+                q3 = st.select_slider(
+                '**3**- This explanation of how the algorithm works has **sufficient detail**:',
+                options=['totally disagree', 'disagree', 'neutral' , 'agree', 'totally agree'])
+                q4 = st.select_slider(
+                '**4**- This explanation of how the algorithm works seems **complete**:',
+                options=['totally disagree', 'disagree', 'neutral' , 'agree', 'totally agree'])
+                q5 = st.select_slider(
+                '**5**- This explanation of how the algorithm works **tells me how to use it**:',
+                options=['totally disagree', 'disagree', 'neutral' , 'agree', 'totally agree'])
+                q6 = st.select_slider(
+                '**6**- This explanation of how the algorithm works is **useful to my goals**:',
+                options=['totally disagree', 'disagree', 'neutral' , 'agree', 'totally agree'])
+                q7 = st.select_slider(
+                '**7**- This explanation of the algorithm shows me how **accurate** the algorithm is:',
+                options=['totally disagree', 'disagree', 'neutral' , 'agree', 'totally agree'])
+                q8 = st.select_slider(
+                '**8**- This explanation lets me judge when I should **trust and not trust** the algorithm:',
+                options=['totally disagree', 'disagree', 'neutral' , 'agree', 'totally agree'])
+                # Every form must have a submit button.
+                submitted = st.form_submit_button("Submit")
+                if submitted:
+                    st.write("question 1", q1)
+                    st.session_state.oocsi.send('EngD_HAII', {
+                        'participant_ID': st.session_state.participantID,
+                        'type of explanation': 'SHAP',
+                        'q1': q1,
+                        'q2': q2,
+                        'q3': q3,
+                        'q4': q4,
+                        'q5': q5,
+                        'q6': q6,
+                        'q7': q7,
+                        'q8': q8,
+                        })
+                    if (st.session_state.lastQuestion =='yes'):
+                        switch_page('finalPage')
+                    else:
+                        st.session_state.profileIndex =st.session_state.profileIndices[0]
+                        switch_page(st.session_state.pages[st.session_state.nextPage1])
+        else:
+            if st.button('Continue to evaluation'):
+                st.session_state['user_active1']=True
+                st.experimental_rerun()

pages/3_DecisionTree.py ADDED Viewed

	@@ -0,0 +1,264 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+from oocsi_source import OOCSI
+from uuid import uuid4
+from streamlit_extras.switch_page_button import switch_page
+import random
+import dtreeviz
+import xgboost as xgb
+from dtreeviz.trees import dtreeviz
+from sklearn.tree import DecisionTreeClassifier
+import graphviz as graphviz
+from sklearn.datasets import make_moons
+import base64
+# import os
+# os.environ["PATH"] += os.pathsep + 'D:/Program Files (x86)/Graphviz2.38/bin/'
+# st.markdown("""<style>
+# .stSlider {
+#     padding-bottom: 20px;
+#     }
+#     </style> """,
+#     unsafe_allow_html=True)
+#Delete this page from the array of pages to visit, this way it cannot be visited twice
+if 'profile2' not in st.session_state:
+    st.session_state.pages.remove("DecisionTree")
+    st.session_state.profile2= 'deleted'
+    if (len(st.session_state.pages)>0):
+        st.session_state.nextPage2 = random.randint(0, len(st.session_state.pages)-1)
+        st.session_state.lastQuestion= 'no'
+    else:
+        st.session_state.lastQuestion= 'yes'
+if 'index2' not in st.session_state:
+    st.session_state.index2= 0
+if 'profileIndex' not in st.session_state:
+    st.session_state.profileIndex= st.session_state.profileIndices[st.session_state.index2]
+name= st.session_state.X_test_names.loc[st.session_state.profileIndex, "Name"]
+header1, header2, header3 = st.columns([1,2,1])
+characteristics1, characteristics2, characteristics3 = st.columns([1,2,1])
+prediction1, prediction2, prediction3 =st.columns([1,2,1])
+explanation1, explanation2, explanation3 = st.columns([1,2,1])
+footer1, footer2, footer3 =st.columns([1,2,1])
+evaluation1, evaluation2, evaluation3 = st.columns([1,2,1])
+@st.cache_resource
+def loadData():
+    train_df = pd.read_csv('assets/train_df.csv')
+    test_df = pd.read_csv('assets/test_df.csv')
+    X_train = train_df.drop("Survived", axis=1)
+    Y_train = train_df["Survived"]
+    X_test  = test_df.drop("PassengerId", axis=1).copy()
+    return X_train, Y_train, X_test
+@st.cache_resource
+def trainModel(X_train,Y_train):
+    model = xgb.XGBClassifier().fit(X_train, Y_train)
+    return model
+# @st.cache_resource
+def createTree(_model, X_train, Y_train, X_test):
+    # X, y = make_moons(n_samples=20, noise=0.25, random_state=3)
+    # treeclf = DecisionTreeClassifier(random_state=0)
+    # treeclf.fit(X, y)
+    # viz_model= dtreeviz(treeclf, X, y, target_name="Classes",
+    #     feature_names=["f0", "f1"], class_names=["c0", "c1"])
+    # clf = DecisionTreeClassifier(max_depth=3)
+    # clf.fit(X_train, Y_train)
+    # Y_pred = clf.predict(X_test)
+    # acc_decision_tree2 = round(clf.score(X_train, Y_train) * 100, 2)
+    # viz_model = dtreeviz(clf,
+    #                      X_train, Y_train,
+    #                     feature_names=X_train.columns,
+    #                     target_name='Survived',
+    #                     class_names=['Dead', 'Alive'],
+    #                     X=X_test.iloc[1]
+    # )
+    viz_model = dtreeviz(_model,
+                         X_train, Y_train,
+                         tree_index=0,
+                         feature_names=list(X_train.columns),
+                         target_name='Survived',
+                         class_names=['Dead', 'Alive'],
+                         X=X_test.iloc[st.session_state.profileIndex],
+                        #depth_range_to_display=(0, 2),
+                        show_just_path=True,
+                        # orientation ='LR',
+                         )
+    #path = "/assets/images/prediction_path" + str(st.session_state.profileIndex) +".svg"
+    viz_model.save("/assets/images/prediction_path.svg")
+    return viz_model
+def render_svg(svg):
+    """Renders the given svg string."""
+    b64 = base64.b64encode(svg.encode('utf-8')).decode("utf-8")
+    html = r'<img src="data:image/svg+xml;base64,%s"/>' % b64
+    st.write(html, unsafe_allow_html=True)
+with header2: #header2
+    st.header("Explanation - Decision Tree")
+    st.markdown('''Decision Tree models are a non-parametric supervised learning method
+     commonly used for classification and regression.
+     They are constructed using two kinf of elements: Nodes and branches. At each node (intersection),
+     one of the data features is evaluated to split the observations into different paths.
+    At typical decision example is shown in the graph below.
+    ''')
+    st.image('assets/Decision_tree.jpg',caption = 'Example of a decision tree')
+    st.markdown(''' The Root Node starts the graph. It is usually the variable that splits the more lcearly the data.
+    Then, intermediate nodes are vsisble were different varaibales are evaluated but no final prediction is made yet.
+    Finally, leaf nodes are present where the predicrtions (numerical of categoriacl) are made.
+    For the Titanic dataset, the prediction will be whether the studied person survived the shipwreck.
+     ''')
+    st.subheader(name)
+    XGBmodel= trainModel(st.session_state.X_train, st.session_state.Y_train)
+    # st.write("For debugging:")
+    # st.write(st.session_state.participantID)
+with characteristics2:
+    # initialize list of lists
+    data = st.session_state.X_test.iloc[st.session_state.profileIndex].values.reshape(1, -1)
+    # Create the pandas DataFrame
+    df = pd.DataFrame(data, columns=st.session_state.X_test.columns)
+    st.dataframe(df)
+with prediction2:
+    prediction =  XGBmodel.predict(st.session_state.X_test.iloc[st.session_state.profileIndex].values.reshape(1, -1))
+    probability = XGBmodel.predict_proba(st.session_state.X_test.iloc[st.session_state.profileIndex].values.reshape(1, -1))
+    if prediction == 0:
+        prob = round((probability[0][0]*100),2)
+        st.markdown("The model predicts with {}% probability  that {}  will :red[**not survive**]".format(prob, name) )
+    else:
+        prob = round((probability[0][1]*100),2)
+        st.markdown("The model predicts with {}% probability  that {}  will :green[**survive**]".format(prob, name) )
+with explanation2:
+    st.subheader("Visualization - Decision Tree")
+    # st.markdown('''Decision Tree model are a non-parametric supervised learning method
+    #  commonly used for classification and regression.
+    #  They are constructed using two kinf of elements: Nodes and branches. At each node (intersection),
+    #  one of the data features is evaluated to split the observations into different paths.
+    # At typical decision example is shown in the graph below.
+    # ''')
+    # st.image('assets/Decision_tree.jpg')
+    # st.markdown(''' The Root Node starts the graph. It is usually the variable that splits the more lcearly the data.
+    # Then, intermediate nodes are vsisble were different varaibales are evaluated but no final prediction is made yet.
+    # Finally, leaf nodes are present where the predicrtions (numerical of categoriacl) are made.
+    # For the Titanic dataset, the prediction will be whether the studied person survived the shipwreck.
+    #  ''')
+    with st.spinner("Please be patient, we are generating a new explanation"):
+        viz_model = createTree(XGBmodel, st.session_state.X_train, st.session_state.Y_train, st.session_state.X_test)
+    # st.image("/assets/images/prediction_path.svg", width =200, use_column_width=True)
+    #viz_model.view()
+     # read in svg prediction path and display
+        path = "/assets/images/prediction_path" + str(st.session_state.profileIndex) +".svg"
+    # st.success("Done!")
+    with open("/assets/images/prediction_path.svg", "r") as f:
+        svg = f.read()
+    render_svg(svg)
+    st.text("")
+with footer2:
+    if (st.session_state.index2 < len(st.session_state.profileIndices)-1):
+        if st.button("New profile"):
+            st.session_state.index2 = st.session_state.index2+1
+            st.session_state.profileIndex = st.session_state.profileIndices[st.session_state.index2]
+            st.experimental_rerun()
+    else:
+        def is_user_active():
+            if 'user_active2' in st.session_state.keys() and st.session_state['user_active2']:
+                return True
+            else:
+                return False
+        if is_user_active():
+            # if st.button("Continue to evaluation"):
+            #     st.write(" ")
+            with st.form("my_form2", clear_on_submit=True):
+                st.subheader("Evaluation")
+                st.write("These questions only ask for your opinion about this specific explanation")
+                q1 = st.select_slider(
+                '**1**- From the explanation, I **understand** how the algorithm works:',
+                options=['totally disagree', 'disagree', 'neutral' , 'agree', 'totally agree'])
+                q2 = st.select_slider(
+                '**2**- This explanation of how the algorithm works is **satisfying**:',
+                options=['totally disagree', 'disagree', 'neutral' , 'agree', 'totally agree'])
+                q3 = st.select_slider(
+                '**3**- This explanation of how the algorithm works has **sufficient detail**:',
+                options=['totally disagree', 'disagree', 'neutral' , 'agree', 'totally agree'])
+                q4 = st.select_slider(
+                '**4**- This explanation of how the algorithm works seems **complete**:',
+                options=['totally disagree', 'disagree', 'neutral' , 'agree', 'totally agree'])
+                q5 = st.select_slider(
+                '**5**- This explanation of how the algorithm works **tells me how to use it**:',
+                options=['totally disagree', 'disagree', 'neutral' , 'agree', 'totally agree'])
+                q6 = st.select_slider(
+                '**6**- This explanation of how the algorithm works is **useful to my goals**:',
+                options=['totally disagree', 'disagree', 'neutral' , 'agree', 'totally agree'])
+                q7 = st.select_slider(
+                '**7**- This explanation of the algorithm shows me how **accurate** the algorithm is:',
+                options=['totally disagree', 'disagree', 'neutral' , 'agree', 'totally agree'])
+                q8 = st.select_slider(
+                '**8**- This explanation lets me judge when I should **trust and not trust** the algorithm:',
+                options=['totally disagree', 'disagree', 'neutral' , 'agree', 'totally agree'])
+                # Every form must have a submit button.
+                submitted = st.form_submit_button("Submit")
+                if submitted:
+                    # st.write("question 1", q1)
+                    st.session_state.oocsi.send('EngD_HAII', {
+                        'participant_ID': st.session_state.participantID,
+                        'type of explanation': 'Decision tree',
+                        'q1': q1,
+                        'q2': q2,
+                        'q3': q3,
+                        'q4': q4,
+                        'q5': q5,
+                        'q6': q6,
+                        'q7': q7,
+                        'q8': q8,
+                        })
+                    if (st.session_state.lastQuestion =='yes'):
+                        switch_page('finalPage')
+                    else:
+                        st.session_state.profileIndex =st.session_state.profileIndices[0]
+                        switch_page(st.session_state.pages[st.session_state.nextPage2])
+        else:
+            if st.button('Continue to evaluation'):
+                st.session_state['user_active2']=True
+                st.experimental_rerun()

pages/4_counterfactual.py ADDED Viewed

	@@ -0,0 +1,215 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+import copy
+from oocsi_source import OOCSI
+from uuid import uuid4
+from streamlit_extras.switch_page_button import switch_page
+import random
+# import shap
+import dice_ml
+from dice_ml.utils import helpers
+import xgboost as xgb
+import matplotlib.pyplot as plt
+from sklearn.ensemble import RandomForestClassifier
+# st.markdown("""<style>
+# .stSlider {
+#     padding-bottom: 20px;
+#     }
+#     </style> """,
+#     unsafe_allow_html=True)
+# st.session_state.Y_train
+# st.session_state.X_test
+# st.session_state.X_test_names
+#Delete this page from the array of pages to visit, this way it cannot be visited twice
+if 'profile3' not in st.session_state:
+    st.session_state.pages.remove("counterfactual")
+    st.session_state.profile3= 'deleted'
+    if (len(st.session_state.pages)>0):
+        st.session_state.nextPage3 = random.randint(0, len(st.session_state.pages)-1)
+        st.session_state.lastQuestion= 'no'
+    else:
+        st.session_state.lastQuestion= 'yes'
+if 'index3' not in st.session_state:
+    st.session_state.index3= 0
+if 'profileIndex' not in st.session_state:
+    st.session_state.profileIndex= st.session_state.profileIndices[st.session_state.index3]
+header1, header2, header3 = st.columns([1,2,1])
+characteristics1, characteristics2, characteristics3 = st.columns([1,2,1])
+prediction1, prediction2, prediction3 =st.columns([1,2,1])
+explanation1, explanation2, explanation3 = st.columns([1,10,1])
+footer1, footer2, footer3 =st.columns([1,2,1])
+evaluation1, evaluation2, evaluation3 = st.columns([1,2,1])
+name= st.session_state.X_test_names.loc[st.session_state.profileIndex, "Name"]
+@st.cache_resource
+def trainModel(X_train,Y_train):
+    model_1 = RandomForestClassifier().fit(X_train, Y_train)  ## Random forest because XGBoost doesn't work with counterfactuals
+    return model_1
+@st.cache_resource
+def getcounterfactual_values(_model,X_prediction, X_train):
+    # compute counterfactual values
+    train_df = pd.read_csv('assets/train_df.csv')
+    continous_col=["Age", 'Fare', 'Siblings_spouses', 'Title', 'Parents_children','relatives' ]
+    # test_df_counter = X_test.copy()
+    # test_df_counter['Survived'] = X_prediction
+    dice_data = dice_ml.Data(dataframe=train_df,continuous_features=continous_col, outcome_name='Survived')
+    dice_model= dice_ml.Model(model=_model, backend="sklearn")
+    explainer = dice_ml.Dice(dice_data, dice_model, method="random")
+    return explainer
+def Counterfactualsplot(X_test, explainer):
+    e1 = explainer.generate_counterfactuals(
+        X_test[1:2],total_CFs=4, desired_class="opposite",
+         features_to_vary = ['Age','Pclass', 'Sex','Siblings_spouses', 'Parents_children', 'Embarked', 'relatives',  'Title']  ) ## Deck, Fare
+    e1.cf_examples_list[0].final_cfs_df.to_csv(path_or_buf=rf'assets\counterfactuals_{name}.csv', index=False)
+    counter_csv = pd.read_csv(f'assets\counterfactuals_{name}.csv')
+    return st.dataframe(counter_csv, width=10000)
+with header2:
+    st.header("Explanation - Counterfactuals")
+    st.markdown('''A counterfactual explanation describes a situation where if a specific event had not occurred, the conclusion would have been different
+    and a specific outcome would not have occurred. In machine learning, counterfactuals are used to explain prediction of individuals instances. The prediction
+    of the model will be analysed and certain conditions/features that created this prediction will be modified to obtain an different outcome for the model.''')
+    st.markdown('''As displayed in the graph below, the relation betwwen the inputs andthe prediciton is modified by the feature values that creates a simple causal
+    relationshhip betwen inputs and predictions.
+''')
+    st.image('assets/counterfactual.jpg', caption = 'Causal relation between inputs and predictions', use_column_width = 'always' )
+    st.markdown('''A counterfactual explanation of a prediction will then describe the smallest amount of change that is necessary to make to change the output
+    prediction to a predefine one.''')
+    st.subheader(name, anchor='top')
+    # st.write("For debugging:")
+    # st.write(st.session_state.participantID)
+    random_forest= trainModel(st.session_state.X_train, st.session_state.Y_train)
+with characteristics2:
+    # initialize list of lists
+    data = st.session_state.X_test.iloc[st.session_state.profileIndex].values.reshape(1, -1)
+    # Create the pandas DataFrame
+    df = pd.DataFrame(data, columns=st.session_state.X_test.columns)
+    st.dataframe(df)
+with prediction2:
+    # st.header("Prediction")
+    prediction =  random_forest.predict(st.session_state.X_test.iloc[st.session_state.profileIndex].values.reshape(1, -1))
+    prediction_all = random_forest.predict(st.session_state.X_test.values)
+    probability = random_forest.predict_proba(st.session_state.X_test.iloc[st.session_state.profileIndex].values.reshape(1, -1))
+    if prediction == 0:
+        prob = round((probability[0][0]*100),2)
+        st.markdown("The model predicts with {}% probability  that {}  will :red[**not survive**]".format(prob, name) )
+    else:
+        prob = round((probability[0][1]*100),2)
+        st.markdown("The model predicts with {}% probability  that {}  will :green[**survive**]".format(prob, name) )
+with explanation2:
+    st.subheader("Explanation")
+    st.markdown("counterfactual, more text here")
+    # with st.spinner("Please be patient, we are generating a new explanation"):
+    explainer= getcounterfactual_values(random_forest, prediction_all, st.session_state.X_test)
+    st.set_option('deprecation.showPyplotGlobalUse', False)
+    e1=Counterfactualsplot(st.session_state.X_test, explainer)
+    data_indices = pd.concat([d.reset_index(drop=True) for d in [st.session_state.ports_df, st.session_state.title_df, st.session_state.gender_df]], axis=1)
+    st.dataframe(data_indices)
+with footer2:
+    if (st.session_state.index3 < len(st.session_state.profileIndices)-1):
+        if st.button("New profile"):
+            st.session_state.index3 = st.session_state.index3+1
+            st.session_state.profileIndex = st.session_state.profileIndices[st.session_state.index3]
+            st.experimental_rerun()
+    else:
+        def is_user_active():
+            if 'user_active3' in st.session_state.keys() and st.session_state['user_active3']:
+                return True
+            else:
+                return False
+        if is_user_active():
+        # if st.button("Continue to evaluation"):
+        #     st.write(" ")
+            with st.form("my_form3", clear_on_submit=True):
+                st.subheader("Evaluation")
+                st.write("These questions only ask for your opinion about this specific explanation")
+                q1 = st.select_slider(
+                '**1**- From the explanation, I **understand** how the algorithm works:',
+                options=['totally disagree', 'disagree', 'neutral' , 'agree', 'totally agree'])
+                q2 = st.select_slider(
+                '**2**- This explanation of how the algorithm works is **satisfying**:',
+                options=['totally disagree', 'disagree', 'neutral' , 'agree', 'totally agree'])
+                q3 = st.select_slider(
+                '**3**- This explanation of how the algorithm works has **sufficient detail**:',
+                options=['totally disagree', 'disagree', 'neutral' , 'agree', 'totally agree'])
+                q4 = st.select_slider(
+                '**4**- This explanation of how the algorithm works seems **complete**:',
+                options=['totally disagree', 'disagree', 'neutral' , 'agree', 'totally agree'])
+                q5 = st.select_slider(
+                '**5**- This explanation of how the algorithm works **tells me how to use it**:',
+                options=['totally disagree', 'disagree', 'neutral' , 'agree', 'totally agree'])
+                q6 = st.select_slider(
+                '**6**- This explanation of how the algorithm works is **useful to my goals**:',
+                options=['totally disagree', 'disagree', 'neutral' , 'agree', 'totally agree'])
+                q7 = st.select_slider(
+                '**7**- This explanation of the algorithm shows me how **accurate** the algorithm is:',
+                options=['totally disagree', 'disagree', 'neutral' , 'agree', 'totally agree'])
+                q8 = st.select_slider(
+                '**8**- This explanation lets me judge when I should **trust and not trust** the algorithm:',
+                options=['totally disagree', 'disagree', 'neutral' , 'agree', 'totally agree'])
+                # Every form must have a submit button.
+                submitted = st.form_submit_button("Submit")
+                if submitted:
+                    # st.write("question 1", q1)
+                    st.session_state.oocsi.send('EngD_HAII', {
+                        'participant_ID': st.session_state.participantID,
+                        'type of explanation': 'counterfactual',
+                        'q1': q1,
+                        'q2': q2,
+                        'q3': q3,
+                        'q4': q4,
+                        'q5': q5,
+                        'q6': q6,
+                        'q7': q7,
+                        'q8': q8,
+                        })
+                    if (st.session_state.lastQuestion =='yes'):
+                        switch_page('finalPage')
+                    else:
+                        st.session_state.profileIndex =st.session_state.profileIndices[0]
+                        switch_page(st.session_state.pages[st.session_state.nextPage3])
+        else:
+            if st.button('Continue to evaluation'):
+                st.session_state['user_active3']=True
+                st.experimental_rerun()

pages/5_visualMap.py ADDED Viewed

	@@ -0,0 +1,179 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+from oocsi_source import OOCSI
+from uuid import uuid4
+from streamlit_extras.switch_page_button import switch_page
+import random
+import shap
+import xgboost as xgb
+import matplotlib.pyplot as plt
+import streamlit.components.v1 as components
+#Delete this page from the array of pages to visit, this way it cannot be visited twice
+if 'profile4' not in st.session_state:
+    st.session_state.pages.remove("visualMap")
+    st.session_state.profile4= 'deleted'
+    if (len(st.session_state.pages)>0):
+        st.session_state.nextPage4 = random.randint(0, len(st.session_state.pages)-1)
+        st.session_state.lastQuestion= 'no'
+    else:
+        st.session_state.lastQuestion= 'yes'
+if 'index4' not in st.session_state:
+    st.session_state.index4= 0
+if 'profileIndex' not in st.session_state:
+    st.session_state.profileIndex= st.session_state.profileIndices[st.session_state.index4]
+header1, header2, header3 = st.columns([1,2,1])
+characteristics1, characteristics2, characteristics3 = st.columns([1,2,1])
+prediction1, prediction2, prediction3 =st.columns([1,2,1])
+explanationheader1,explanationheader2, explanationheader3 = st.columns([1,2,1])
+explanation1, explanation2, explanation3 = st.columns([1,6,1])
+footer1, footer2, footer3 =st.columns([1,2,1])
+evaluation1, evaluation2, evaluation3 = st.columns([1,2,1])
+name= st.session_state.X_test_names.loc[st.session_state.profileIndex, "Name"]
+@st.cache_resource
+def trainModel(X_train,Y_train):
+    model = xgb.XGBClassifier().fit(X_train, Y_train)
+    return model
+@st.cache_resource
+def getSHAPvalues(_model,X_train, Y_train, X_test):
+    # compute SHAP values
+    explainer = shap.Explainer(_model, X_test)
+    shap_values = explainer(X_test)
+    return shap_values
+def shapPlot(X_test, _shap_values):
+    return shap.plots.waterfall(shap_values[st.session_state.profileIndex])
+with header2:
+    st.header('Visual Method for XAI')
+    st.markdown('''In this part, a new method of Explainability was implemented using more visual techniques for communicating of the model
+    predictions and the features influence. The values showed when clicked on each feature (title, Age, deck, ...) were obtained using the SHAP algorithm.
+    Let yourself play with it and tell us how easy it was to understand the model prediciton and the influence of the features!
+        ''')
+    st.markdown("Click on the image to see how each attribute contributed and hover over them to see the SHAP values")
+    # st.subheader(name, anchor='top')
+    st.write("For debugging:")
+    st.write(st.session_state.participantID)
+    XGBmodel= trainModel(st.session_state.X_train, st.session_state.Y_train)
+with characteristics2:
+    # initialize list of lists
+    data = st.session_state.X_test.iloc[st.session_state.profileIndex].values.reshape(1, -1)
+    # Create the pandas DataFrame
+    df = pd.DataFrame(data, columns=st.session_state.X_test.columns)
+    # st.dataframe(df)
+with prediction2:
+    st.subheader("Prediction")
+    prediction =  XGBmodel.predict(st.session_state.X_test.iloc[st.session_state.profileIndex].values.reshape(1, -1))
+    probability = XGBmodel.predict_proba(st.session_state.X_test.iloc[st.session_state.profileIndex].values.reshape(1, -1))
+    if prediction == 0:
+        prob = round((probability[0][0]*100),2)
+        st.markdown("The model predicts with {}% probability  that {}  will :red[**not survive**]".format(prob, name) )
+    else:
+        prob = round((probability[0][1]*100),2)
+        st.markdown("The model predicts with {}% probability  that {}  will :green[**survive**]".format(prob, name) )
+# with explanationheader2:
+#     st.subheader("Explanation")
+#     st.markdown("Click on the image to see how each attribute contributed and hover over them to see the SHAP values")
+with explanation2:
+    components.iframe("https://observablehq.com/embed/d177ef99668b6553@1065?cells=viewof+button%2Cchart2", scrolling=False, height=683)
+with footer2:
+    def is_user_active():
+        if 'user_active4' in st.session_state.keys() and st.session_state['user_active4']:
+            return True
+        else:
+            return False
+    # if st.button('press here to edit'):
+    if is_user_active():
+        with st.form("my_form4", clear_on_submit=True):
+            st.subheader("Evaluation")
+            st.write("These questions only ask for your opinion about this specific explanation")
+            q1 = st.select_slider(
+            '**1**- From the explanation, I **understand** how the algorithm works:',
+            options=['totally disagree', 'disagree', 'neutral' , 'agree', 'totally agree'])
+            q2 = st.select_slider(
+            '**2**- This explanation of how the algorithm works is **satisfying**:',
+            options=['totally disagree', 'disagree', 'neutral' , 'agree', 'totally agree'])
+            q3 = st.select_slider(
+            '**3**- This explanation of how the algorithm works has **sufficient detail**:',
+            options=['totally disagree', 'disagree', 'neutral' , 'agree', 'totally agree'])
+            q4 = st.select_slider(
+            '**4**- This explanation of how the algorithm works seems **complete**:',
+            options=['totally disagree', 'disagree', 'neutral' , 'agree', 'totally agree'])
+            q5 = st.select_slider(
+            '**5**- This explanation of how the algorithm works **tells me how to use it**:',
+            options=['totally disagree', 'disagree', 'neutral' , 'agree', 'totally agree'])
+            q6 = st.select_slider(
+            '**6**- This explanation of how the algorithm works is **useful to my goals**:',
+            options=['totally disagree', 'disagree', 'neutral' , 'agree', 'totally agree'])
+            q7 = st.select_slider(
+            '**7**- This explanation of the algorithm shows me how **accurate** the algorithm is:',
+            options=['totally disagree', 'disagree', 'neutral' , 'agree', 'totally agree'])
+            q8 = st.select_slider(
+            '**8**- This explanation lets me judge when I should **trust and not trust** the algorithm:',
+            options=['totally disagree', 'disagree', 'neutral' , 'agree', 'totally agree'])
+            # Every form must have a submit button.
+            submitted = st.form_submit_button("Submit")
+            if submitted:
+                #st.write("question 1", q1)
+                st.session_state.oocsi.send('EngD_HAII', {
+                    'participant_ID': st.session_state.participantID,
+                    'type of explanation': 'visualmap',
+                    'q1': q1,
+                    'q2': q2,
+                    'q3': q3,
+                    'q4': q4,
+                    'q5': q5,
+                    'q6': q6,
+                    'q7': q7,
+                    'q8': q8,
+                    })
+                if (st.session_state.lastQuestion =='yes'):
+                    switch_page('finalPage')
+                else:
+                    st.session_state.profileIndex =st.session_state.profileIndices[0]
+                    switch_page(st.session_state.pages[st.session_state.nextPage4])
+    else:
+        if st.button('Continue to evaluation'):
+            st.session_state['user_active4']=True
+            st.experimental_rerun()

pages/6_finalpage.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import streamlit as st
+import streamlit.components.v1 as components
+from streamlit_extras.switch_page_button import switch_page
+from oocsi_source import OOCSI
+header1, header2, header3 = st.columns([1,4,1])
+image1, image2, image3 = st.columns([1,50,1])
+body1, body2, body3 =st.columns([1,2,1])
+with header2:
+    st.title("Comparing the different methods")
+    st.markdown("This is the final section of this experiment, please rate and compare the different methods")
+with image2:
+    st.image('assets/images/overview methods.png')
+with body2:
+    with st.form("my_form"):
+        st.write("As a final evaluation, please rate the different types of explanations (0-10). This is a general grade that you you would give to the different explanation methods.")
+        shap = st.slider('SHAP', 0, 10)
+        dt = st.slider('Decision tree', 0, 10)
+        counterfactual  = st.slider("Counterfactual", 0, 10)
+        visualmap = st.slider("Visual map", 0, 10)
+        favourite = st.radio("What was your favourite type of epxlanation?", ('SHAP', 'Decision tree', 'Counterfactual', 'Visual map'))
+        why = st.text_area('Please explain why', "")
+        # Every form must have a submit button.
+        submitted = st.form_submit_button("Submit")
+        if submitted:
+            st.session_state.oocsi.send('EngD_HAII_comparison', {
+                    'participant_ID': st.session_state.participantID,
+                    'shap': shap,
+                    'decisiontree': dt,
+                    'counterfactual': counterfactual,
+                    'visualmap': visualmap,
+                    'favourite': favourite,
+                    'why': why
+                    })
+            st.balloons()
+            switch_page('thankyou')
+    # Execute your app
+    # embed streamlit docs in a streamlit app

pages/7_thankyou.py ADDED Viewed

	@@ -0,0 +1,13 @@

+import streamlit as st
+import streamlit.components.v1 as components
+from streamlit_extras.switch_page_button import switch_page
+from oocsi_source import OOCSI
+header1, header2, header3 = st.columns([1,2,1])
+body1, body2, body3 =st.columns([1,2,1])
+with header2:
+    st.balloons()
+    st.title("Thank you for completing this survey")
+    st.write("You can now close this tab")