giskard-evaluator

Running

App Files Files Community

200

weixuan-giskard commited on Dec 1, 2023

Commit

583defc

1 Parent(s): ea670d5

Attemp to match labels in model and in dataset

Browse files

Files changed (1) hide show

app.py +67 -5

app.py CHANGED Viewed

@@ -6,6 +6,10 @@ import os
 import time
 from pathlib import Path
 HF_REPO_ID = 'HF_REPO_ID'
 HF_SPACE_ID = 'SPACE_ID'
@@ -54,15 +58,41 @@ def check_dataset(dataset_id, dataset_config="default", dataset_split="test"):
     return dataset_id, dataset_config, dataset_split
 def try_validate(model_id, dataset_id, dataset_config, dataset_split):
     # Validate model
     m_id, ppl = check_model(model_id=model_id)
     if m_id is None:
         gr.Warning(f'Model "{model_id}" is not accessible. Please set your HF_TOKEN if it is a private model.')
-        return dataset_config, dataset_split, gr.update(interactive=False)
     if isinstance(ppl, Exception):
         gr.Warning(f'Failed to load "{model_id} model": {ppl}')
-        return dataset_config, dataset_split, gr.update(interactive=False)
     # Validate dataset
     d_id, config, split = check_dataset(dataset_id=dataset_id, dataset_config=dataset_config, dataset_split=dataset_split)
@@ -80,15 +110,42 @@ def try_validate(model_id, dataset_id, dataset_config, dataset_split):
         dataset_ok = True
     if not dataset_ok:
-        return config, split, gr.update(interactive=False)
     # TODO: Validate column mapping by running once
     del ppl
     gr.Info("Model and dataset validations passed. Your can submit the evaluation task.")
-    return config, split, gr.update(interactive=True)
 def try_submit(m_id, d_id, config, split, local):
@@ -133,7 +190,7 @@ def try_submit(m_id, d_id, config, split, local):
         with open(output_dir / "report.html", "w") as f:
             print(f'Writing to {output_dir / "report.html"}')
             f.write(rendered_report)
         print(f"Finished local evaluation on {eval_str}: {time.time() - start:.2f}s")
@@ -155,6 +212,7 @@ with gr.Blocks(theme=theme) as iface:
                 value=0,
             )
             run_local = gr.Checkbox(value=True, label="Run in this Space")
         with gr.Column():
             dataset_id_input = gr.Textbox(
@@ -180,6 +238,8 @@ with gr.Blocks(theme=theme) as iface:
                 value="test",
             )
     with gr.Row():
         validate_btn = gr.Button("Validate model and dataset", variant="primary")
         run_btn = gr.Button(
@@ -199,6 +259,8 @@ with gr.Blocks(theme=theme) as iface:
                 dataset_config_input,
                 dataset_split_input,
                 run_btn,
             ],
         )
         run_btn.click(

 import time
 from pathlib import Path
+import pandas as pd
+from transformers.pipelines import TextClassificationPipeline
 HF_REPO_ID = 'HF_REPO_ID'
 HF_SPACE_ID = 'SPACE_ID'
     return dataset_id, dataset_config, dataset_split
+def text_classificaiton_match_label_case_unsensative(id2label_mapping, label):
+    for model_label in id2label_mapping.keys():
+        if model_label.upper() == label.upper():
+            return model_label, label
+def text_classification_map_model_and_dataset_labels(id2label, dataset_features):
+    id2label_mapping = {id2label[k]: None for k in id2label.keys()}
+    for feature in dataset_features.values():
+        if not isinstance(feature, datasets.ClassLabel):
+            continue
+        if len(feature.names) != len(id2label_mapping.keys()):
+            continue
+        # Try to match labels
+        for label in feature.names:
+            if label in id2label_mapping.keys():
+                model_label = label
+            else:
+                # Try to find case unsensative
+                model_label, label = text_classificaiton_match_label_case_unsensative(id2label_mapping, label)
+            id2label_mapping[model_label] = label
+    return id2label_mapping
 def try_validate(model_id, dataset_id, dataset_config, dataset_split):
     # Validate model
     m_id, ppl = check_model(model_id=model_id)
     if m_id is None:
         gr.Warning(f'Model "{model_id}" is not accessible. Please set your HF_TOKEN if it is a private model.')
+        return dataset_config, dataset_split, gr.update(interactive=False), gr.update(visible=False), gr.update(visible=False)
     if isinstance(ppl, Exception):
         gr.Warning(f'Failed to load "{model_id} model": {ppl}')
+        return dataset_config, dataset_split, gr.update(interactive=False), gr.update(visible=False), gr.update(visible=False)
     # Validate dataset
     d_id, config, split = check_dataset(dataset_id=dataset_id, dataset_config=dataset_config, dataset_split=dataset_split)
         dataset_ok = True
     if not dataset_ok:
+        return config, split, gr.update(interactive=False), gr.update(visible=False), gr.update(visible=False)
     # TODO: Validate column mapping by running once
+    prediction_result = {}
+    id2label_df = None
+    if isinstance(ppl, TextClassificationPipeline):
+        # Retrieve all labels
+        id2label_mapping = {}
+        try:
+            results = ppl({"text": "Test"}, top_k=None)
+            prediction_result = {
+                result["label"]: result["score"] for result in results
+            }
+        except Exception as e:
+            # Pipeline is not executable
+            pass
+        # We assume dataset is ok here
+        ds = datasets.load_dataset(d_id, config)[split]
+        try:
+            id2label = ppl.model.config.id2label
+            id2label_mapping = text_classification_map_model_and_dataset_labels(ppl.model.config.id2label, ds.features)
+            id2label_df = pd.DataFrame({
+                "ID": [i for i in id2label.keys()],
+                "Model labels": [id2label[label] for label in id2label.keys()],
+                "Dataset labels": [id2label_mapping[id2label[label]] for label in id2label.keys()],
+            })
+        except AttributeError:
+            # Dataset does not have features
+            pass
     del ppl
     gr.Info("Model and dataset validations passed. Your can submit the evaluation task.")
+    return config, split, gr.update(interactive=True), gr.update(value=prediction_result, visible=True), gr.update(value=id2label_df, visible=True)
 def try_submit(m_id, d_id, config, split, local):
         with open(output_dir / "report.html", "w") as f:
             print(f'Writing to {output_dir / "report.html"}')
             f.write(rendered_report)
         print(f"Finished local evaluation on {eval_str}: {time.time() - start:.2f}s")
                 value=0,
             )
             run_local = gr.Checkbox(value=True, label="Run in this Space")
+            example_labels = gr.Label(label='Model pipeline test prediction result', visible=False)
         with gr.Column():
             dataset_id_input = gr.Textbox(
                 value="test",
             )
+            id2label_mapping_dataframe = gr.DataFrame(visible=False)
     with gr.Row():
         validate_btn = gr.Button("Validate model and dataset", variant="primary")
         run_btn = gr.Button(
                 dataset_config_input,
                 dataset_split_input,
                 run_btn,
+                example_labels,
+                id2label_mapping_dataframe,
             ],
         )
         run_btn.click(