Spaces:

nazneen
/

error-analysis

Runtime error

App Files Files Community

nazneen commited on May 21, 2022

Commit

8dd2bc6

1 Parent(s): 76e2fde

adding parquets

Browse files

Files changed (1) hide show

app.py +14 -18

app.py CHANGED Viewed

@@ -62,17 +62,17 @@ def down_samp(embedding):
 def data_comparison(df):
-    selection = alt.selection_multi(fields=['cluster','label'])
-    color = alt.condition(alt.datum.slice == 'high-loss', alt.Color('cluster:N', scale = alt.Scale(domain=df.cluster.unique().tolist())), alt.value("lightgray"))
     opacity = alt.condition(selection, alt.value(0.7), alt.value(0.25))
     # basic chart
     scatter = alt.Chart(df).mark_point(size=100, filled=True).encode(
-        x=alt.X('x', axis=None),
-        y=alt.Y('y', axis=None),
         color=color,
-        shape=alt.Shape('label', scale=alt.Scale(range=['circle', 'diamond'])),
-        tooltip=['cluster','slice','content','label','pred'],
         opacity=opacity
     ).properties(
         width=1000,
@@ -80,31 +80,26 @@ def data_comparison(df):
     ).interactive()
     legend = alt.Chart(df).mark_point(size=100, filled=True).encode(
-        x=alt.X("label"),
-        y=alt.Y('cluster:N', axis=alt.Axis(orient='right'), title=""),
-        shape=alt.Shape('label', scale=alt.Scale(
         range=['circle', 'diamond']), legend=None),
         color=color,
     ).add_selection(
         selection
     )
     layered = scatter | legend
     layered = layered.configure_axis(
         grid=False
     ).configure_view(
         strokeOpacity=0
     )
     return layered
 def quant_panel(embedding_df):
     """ Quantitative Panel Layout"""
     all_metrics = {}
     st.warning("**Error slice visualization**")
     with st.expander("How to read this chart:"):
         st.markdown("* Each **point** is an input example.")
         st.markdown("* Gray points have low-loss and the colored have high-loss. High-loss instances are clustered using **kmeans** and each color represents a cluster.")
@@ -210,12 +205,14 @@ def topic_distribution(weights, smoothing=0.01):
 if __name__ == "__main__":
     ### STREAMLIT APP CONGFIG ###
-    st.set_page_config(layout="wide", page_title="Error Analysis")
     ut.init_style()
     lcol, rcol = st.columns([2, 2])
     # ******* loading the mode and the data
     dataset = st.sidebar.selectbox(
         "Dataset",
         ["amazon_polarity", "yelp_polarity"],
@@ -246,8 +243,6 @@ if __name__ == "__main__":
         st.session_state["user_data"] = data_df
     if "selected_slice" not in st.session_state:
         st.session_state["selected_slice"] = None
-    if "embedding" not in st.session_state:
-        st.session_state["embedding"] = embedding_umap
     data_df['loss'] = data_df['loss'].astype(float)
     losses = data_df['loss']
@@ -258,13 +253,14 @@ if __name__ == "__main__":
     if run_kmeans == 'True':
         merged = kmeans(data_df,num_clusters=num_clusters)
     with lcol:
-        st.markdown('<h3>Error Slices (Subset of evaluation dataset the model performs poorly)</h3>',unsafe_allow_html=True)
         dataframe = merged[['content', 'label', 'pred', 'loss', 'cluster']].sort_values(
             by=['loss'], ascending=False)
         table_html = dataframe.to_html(
             columns=['content', 'label', 'pred', 'loss', 'cluster'], max_rows=50)
         # table_html = table_html.replace("<th>", '<th align="left">')  # left-align the headers
         with st.expander("How to read the table:"):
             st.markdown("* The table displays model error slices on the evaluation dataset, sorted by loss.")
             st.markdown("* Each row is an input example that includes the label, model pred, loss, and error cluster.")
         st.write(dataframe,width=900, height=300)

 def data_comparison(df):
+    selection = alt.selection_multi(fields=['cluster:O','label:O'])
+    color = alt.condition(alt.datum.slice == 'high-loss', alt.Color('cluster:O', scale = alt.Scale(domain=df.cluster.unique().tolist())), alt.value("lightgray"))
     opacity = alt.condition(selection, alt.value(0.7), alt.value(0.25))
     # basic chart
     scatter = alt.Chart(df).mark_point(size=100, filled=True).encode(
+        x=alt.X('x:Q', axis=None),
+        y=alt.Y('y:Q', axis=None),
         color=color,
+        shape=alt.Shape('label:O', scale=alt.Scale(range=['circle', 'diamond'])),
+        tooltip=['cluster:O','slice:N','content:N','label:O','pred:O'],
         opacity=opacity
     ).properties(
         width=1000,
     ).interactive()
     legend = alt.Chart(df).mark_point(size=100, filled=True).encode(
+        x=alt.X("label:O"),
+        y=alt.Y('cluster:O', axis=alt.Axis(orient='right'), title=""),
+        shape=alt.Shape('label:O', scale=alt.Scale(
         range=['circle', 'diamond']), legend=None),
         color=color,
     ).add_selection(
         selection
     )
     layered = scatter | legend
     layered = layered.configure_axis(
         grid=False
     ).configure_view(
         strokeOpacity=0
     )
     return layered
 def quant_panel(embedding_df):
     """ Quantitative Panel Layout"""
     all_metrics = {}
     st.warning("**Error slice visualization**")
     with st.expander("How to read this chart:"):
         st.markdown("* Each **point** is an input example.")
         st.markdown("* Gray points have low-loss and the colored have high-loss. High-loss instances are clustered using **kmeans** and each color represents a cluster.")
 if __name__ == "__main__":
     ### STREAMLIT APP CONGFIG ###
+    st.set_page_config(layout="wide", page_title="Interactive Error Analysis")
     ut.init_style()
     lcol, rcol = st.columns([2, 2])
     # ******* loading the mode and the data
+    st.sidebar.mardown("<h4>Interactive Error Analysis</h4>", unsafe_allow_html=True)
     dataset = st.sidebar.selectbox(
         "Dataset",
         ["amazon_polarity", "yelp_polarity"],
         st.session_state["user_data"] = data_df
     if "selected_slice" not in st.session_state:
         st.session_state["selected_slice"] = None
     data_df['loss'] = data_df['loss'].astype(float)
     losses = data_df['loss']
     if run_kmeans == 'True':
         merged = kmeans(data_df,num_clusters=num_clusters)
     with lcol:
+        st.markdown('<h3>Error Slices</h3>',unsafe_allow_html=True)
         dataframe = merged[['content', 'label', 'pred', 'loss', 'cluster']].sort_values(
             by=['loss'], ascending=False)
         table_html = dataframe.to_html(
             columns=['content', 'label', 'pred', 'loss', 'cluster'], max_rows=50)
         # table_html = table_html.replace("<th>", '<th align="left">')  # left-align the headers
         with st.expander("How to read the table:"):
+            st.markdown("* *Error slice* refers to the subset of evaluation dataset the model performs poorly on.")
             st.markdown("* The table displays model error slices on the evaluation dataset, sorted by loss.")
             st.markdown("* Each row is an input example that includes the label, model pred, loss, and error cluster.")
         st.write(dataframe,width=900, height=300)