Spaces:

aseifert
/

ExplaiNER

Runtime error

App Files Files

Alexander Seifert commited on Jun 9, 2022

Commit

fb9cb6e

1 Parent(s): 17ba05a

update

Browse files

Files changed (5) hide show

main.py +4 -17
subpages/__init__.py +1 -1
subpages/{embeddings.py → hidden_states.py} +2 -2
subpages/home.py +5 -5
utils.py +13 -12

main.py CHANGED Viewed

@@ -17,8 +17,9 @@ from subpages import (
     RawDataPage,
 )
 from subpages.attention import AttentionPage
-from subpages.embeddings import EmbeddingsPage
 from subpages.inspect import InspectPage
 sts = st.sidebar
 st.set_page_config(
@@ -54,25 +55,11 @@ def _write_color_legend(context):
     def style(x):
         return [f"background-color: {rgb}; opacity: 1;" for rgb in colors]
-    labelmap = {
-        "O": "O",
-        "person": "🙎",
-        "PER": "🙎",
-        "location": "🌎",
-        "LOC": "🌎",
-        "corporation": "🏤",
-        "ORG": "🏤",
-        "product": "📱",
-        "creative": "🎷",
-        "group": "🎷",
-        "MISC": "🎷",
-    }
     labels = list(set([lbl.split("-")[1] if "-" in lbl else lbl for lbl in context.labels]))
     colors = [st.session_state.get(f"color_{lbl}", "#000000") for lbl in labels]
     color_legend_df = pd.DataFrame(
-        [labelmap[l] for l in labels], columns=["label"], index=labels
     ).T
     st.sidebar.write(
         color_legend_df.T.style.apply(style, axis=0).set_properties(
@@ -85,7 +72,7 @@ def main():
     pages: list[Page] = [
         HomePage(),
         AttentionPage(),
-        EmbeddingsPage(),
         ProbingPage(),
         MetricsPage(),
         MisclassifiedPage(),

     RawDataPage,
 )
 from subpages.attention import AttentionPage
+from subpages.hidden_states import HiddenStatesPage
 from subpages.inspect import InspectPage
+from utils import classmap
 sts = st.sidebar
 st.set_page_config(
     def style(x):
         return [f"background-color: {rgb}; opacity: 1;" for rgb in colors]
     labels = list(set([lbl.split("-")[1] if "-" in lbl else lbl for lbl in context.labels]))
     colors = [st.session_state.get(f"color_{lbl}", "#000000") for lbl in labels]
     color_legend_df = pd.DataFrame(
+        [classmap[l] for l in labels], columns=["label"], index=labels
     ).T
     st.sidebar.write(
         color_legend_df.T.style.apply(style, axis=0).set_properties(
     pages: list[Page] = [
         HomePage(),
         AttentionPage(),
+        HiddenStatesPage(),
         ProbingPage(),
         MetricsPage(),
         MisclassifiedPage(),

subpages/__init__.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from subpages.attention import AttentionPage
 from subpages.debug import DebugPage
-from subpages.embeddings import EmbeddingsPage
 from subpages.find_duplicates import FindDuplicatesPage
 from subpages.home import HomePage
 from subpages.inspect import InspectPage
 from subpages.losses import LossesPage

 from subpages.attention import AttentionPage
 from subpages.debug import DebugPage
 from subpages.find_duplicates import FindDuplicatesPage
+from subpages.hidden_states import HiddenStatesPage
 from subpages.home import HomePage
 from subpages.inspect import InspectPage
 from subpages.losses import LossesPage

subpages/{embeddings.py → hidden_states.py} RENAMED Viewed

@@ -28,8 +28,8 @@ def reduce_dim_umap(X, n_neighbors=5, min_dist=0.1, metric="euclidean"):
     return UMAP(n_neighbors=n_neighbors, min_dist=min_dist, metric=metric).fit_transform(X)
-class EmbeddingsPage(Page):
-    name = "Embeddings"
     icon = "grid-3x3"
     def get_widget_defaults(self):

     return UMAP(n_neighbors=n_neighbors, min_dist=min_dist, metric=metric).fit_transform(X)
+class HiddenStatesPage(Page):
+    name = "Hidden States"
     icon = "grid-3x3"
     def get_widget_defaults(self):

subpages/home.py CHANGED Viewed

@@ -3,11 +3,10 @@ import random
 from typing import Optional
 import streamlit as st
-from pandas import wide_to_long
 from data import get_data
 from subpages.page import Context, Page
-from utils import color_map_color
 _SENTENCE_ENCODER_MODEL = (
     "sentence-transformers/all-MiniLM-L6-v2",
@@ -53,7 +52,7 @@ class HomePage(Page):
         with st.expander("💡", expanded=True):
             st.write(
-                "**Error Analysis is an important but often overlooked part of the data science project lifecycle**, for which there is still very little tooling available. Practitioners tend to write throwaway code or, worse, skip this crucial step of understanding their models' errors altogether. This project tries to provide an **extensive toolkit to probe any NER model/dataset combination**, find labeling errors and understand the models' and datasets' limitations, leading the user on her way to further improvements."
             )
         col1, _, col2a, col2b = st.columns([1, 0.05, 0.15, 0.15])
@@ -91,7 +90,7 @@ class HomePage(Page):
                 st.text_input(
                     label="Encoder Model:",
                     key="encoder_model_name",
-                    help="Path or name of the encoder to use",
                 )
                 ds_name = st.text_input(
                     label="Dataset:",
@@ -136,8 +135,9 @@ class HomePage(Page):
             emojis = list(json.load(open("subpages/emoji-en-US.json")).keys())
             for label in labels:
                 if f"icon_{label}" not in st.session_state:
-                    st.session_state[f"icon_{label}"] = "🤗"  #  labels[label]
                 st.selectbox(label, key=f"icon_{label}", options=emojis)
         # if st.button("Reset to defaults"):
         #     st.session_state.update(**get_home_page_defaults())

 from typing import Optional
 import streamlit as st
 from data import get_data
 from subpages.page import Context, Page
+from utils import classmap, color_map_color
 _SENTENCE_ENCODER_MODEL = (
     "sentence-transformers/all-MiniLM-L6-v2",
         with st.expander("💡", expanded=True):
             st.write(
+                "**Error Analysis is an important but often overlooked part of the data science project lifecycle**, for which there is still very little tooling available. Practitioners tend to write throwaway code or, worse, skip this crucial step of understanding their models' errors altogether. This project tries to provide an **extensive toolkit to probe any NER model/dataset combination**, find labeling errors and understand the models' and datasets' limitations, leading the user on her way to further **improving both model AND dataset**."
             )
         col1, _, col2a, col2b = st.columns([1, 0.05, 0.15, 0.15])
                 st.text_input(
                     label="Encoder Model:",
                     key="encoder_model_name",
+                    help="Path or name of the encoder to use for duplicate detection",
                 )
                 ds_name = st.text_input(
                     label="Dataset:",
             emojis = list(json.load(open("subpages/emoji-en-US.json")).keys())
             for label in labels:
                 if f"icon_{label}" not in st.session_state:
+                    st.session_state[f"icon_{label}"] = classmap[label]
                 st.selectbox(label, key=f"icon_{label}", options=emojis)
+                classmap[label] = st.session_state[f"icon_{label}"]
         # if st.button("Reset to defaults"):
         #     st.session_state.update(**get_home_page_defaults())

utils.py CHANGED Viewed

@@ -14,6 +14,19 @@ tokenizer_hash_funcs = {
 # device = torch.device("cuda" if torch.cuda.is_available() else "cpu" if torch.has_mps else "cpu")
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 def aggrid_interactive_table(df: pd.DataFrame) -> dict:
     """Creates an st-aggrid interactive table based on a dataframe.
@@ -159,18 +172,6 @@ def colorize_classes(df: pd.DataFrame) -> pd.DataFrame:
 def htmlify_labeled_example(example: pd.DataFrame) -> str:
     html = []
-    classmap = {
-        "O": "O",
-        "PER": "🙎",
-        "person": "🙎",
-        "LOC": "🌎",
-        "location": "🌎",
-        "ORG": "🏤",
-        "corporation": "🏤",
-        "product": "📱",
-        "creative": "🎷",
-        "MISC": "🎷",
-    }
     for _, row in example.iterrows():
         pred = row.preds.split("-")[1] if "-" in row.preds else "O"

 # device = torch.device("cuda" if torch.cuda.is_available() else "cpu" if torch.has_mps else "cpu")
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+classmap = {
+    "O": "O",
+    "PER": "🙎",
+    "person": "🙎",
+    "LOC": "🌎",
+    "location": "🌎",
+    "ORG": "🏤",
+    "corporation": "🏤",
+    "product": "📱",
+    "creative": "🎷",
+    "MISC": "🎷",
+}
 def aggrid_interactive_table(df: pd.DataFrame) -> dict:
     """Creates an st-aggrid interactive table based on a dataframe.
 def htmlify_labeled_example(example: pd.DataFrame) -> str:
     html = []
     for _, row in example.iterrows():
         pred = row.preds.split("-")[1] if "-" in row.preds else "O"