README.md CHANGED
@@ -4,16 +4,9 @@ emoji: πŸ’πŸ”
4
  colorFrom: blue
5
  colorTo: indigo
6
  sdk: gradio
7
- sdk_version: 4.16.0
8
  app_file: app.py
9
  pinned: false
10
-
11
- hf_oauth: true
12
- # optional, default duration is 8 hours/480 minutes. Max duration is 30 days/43200 minutes.
13
- hf_oauth_expiration_minutes: 480
14
- # optional, see "Scopes" below. "openid profile" is always included.
15
- hf_oauth_scopes:
16
- - inference-api
17
  ---
18
 
19
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
4
  colorFrom: blue
5
  colorTo: indigo
6
  sdk: gradio
7
+ sdk_version: 4.7.1
8
  app_file: app.py
9
  pinned: false
 
 
 
 
 
 
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -5,7 +5,7 @@ import gradio as gr
5
  from app_debug import get_demo as get_demo_debug
6
  from app_leaderboard import get_demo as get_demo_leaderboard
7
  from app_text_classification import get_demo as get_demo_text_classification
8
- from run_jobs import start_process_run_job, stop_thread
9
 
10
  try:
11
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="green")) as demo:
 
5
  from app_debug import get_demo as get_demo_debug
6
  from app_leaderboard import get_demo as get_demo_leaderboard
7
  from app_text_classification import get_demo as get_demo_text_classification
8
+ from utils.run_jobs import start_process_run_job, stop_thread
9
 
10
  try:
11
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="green")) as demo:
app_debug.py CHANGED
@@ -3,12 +3,12 @@ from os.path import isfile, join
3
  import html
4
 
5
  import gradio as gr
6
- import os
7
- import pipe
8
- from io_utils import get_logs_file
9
 
10
  LOG_PATH = "./tmp"
11
- CONFIG_PATH = "./cicd/configs/submitted/"
12
  MAX_FILES_NUM = 20
13
 
14
 
@@ -69,19 +69,17 @@ def get_queue_status():
69
 
70
 
71
  def get_demo():
72
- if not os.path.exists(CONFIG_PATH):
73
- os.makedirs(CONFIG_PATH)
74
  with gr.Row():
75
  gr.HTML(
76
  value=get_queue_status,
77
  every=5,
78
  )
79
- with gr.Accordion(label="Log Files", open=True):
 
 
80
  with gr.Row():
81
  gr.Textbox(
82
  value=get_logs_file, every=0.5, lines=10, visible=True, label="Current Log File"
83
  )
84
- with gr.Row():
85
- gr.Files(value=get_log_files, label="Log Files", every=10)
86
  with gr.Accordion(label="Config Files", open=False):
87
  gr.Files(value=get_config_files, label="Config Files", every=10)
 
3
  import html
4
 
5
  import gradio as gr
6
+
7
+ import utils.pipe as pipe
8
+ from utils.io_utils import get_logs_file
9
 
10
  LOG_PATH = "./tmp"
11
+ CONFIG_PATH = "./cicd/configs/"
12
  MAX_FILES_NUM = 20
13
 
14
 
 
69
 
70
 
71
  def get_demo():
 
 
72
  with gr.Row():
73
  gr.HTML(
74
  value=get_queue_status,
75
  every=5,
76
  )
77
+ with gr.Accordion(label="Log Files", open=False):
78
+ with gr.Row():
79
+ gr.Files(value=get_log_files, label="Log Files", every=10)
80
  with gr.Row():
81
  gr.Textbox(
82
  value=get_logs_file, every=0.5, lines=10, visible=True, label="Current Log File"
83
  )
 
 
84
  with gr.Accordion(label="Config Files", open=False):
85
  gr.Files(value=get_config_files, label="Config Files", every=10)
app_leaderboard.py CHANGED
@@ -5,10 +5,10 @@ import gradio as gr
5
  import pandas as pd
6
  import datetime
7
 
8
- from fetch_utils import (check_dataset_and_get_config,
9
  check_dataset_and_get_split)
10
 
11
- import leaderboard
12
  logger = logging.getLogger(__name__)
13
  global update_time
14
  update_time = datetime.datetime.fromtimestamp(0)
@@ -88,29 +88,11 @@ def get_demo(leaderboard_tab):
88
  dataset_ids = get_dataset_ids(records)
89
 
90
  column_names = records.columns.tolist()
91
- issue_columns = column_names[:11]
92
- info_columns = column_names[15:]
93
  default_columns = ["model_id", "dataset_id", "total_issues", "report_link"]
94
  default_df = records[default_columns] # extract columns selected
95
  types = get_types(default_df)
96
  display_df = get_display_df(default_df) # the styled dataframe to display
97
 
98
- with gr.Row():
99
- with gr.Column():
100
- issue_columns_select = gr.CheckboxGroup(
101
- label="Issue Columns",
102
- choices=issue_columns,
103
- value=[],
104
- interactive=True,
105
- )
106
- with gr.Column():
107
- info_columns_select = gr.CheckboxGroup(
108
- label="Info Columns",
109
- choices=info_columns,
110
- value=default_columns,
111
- interactive=True,
112
- )
113
-
114
  with gr.Row():
115
  task_select = gr.Dropdown(
116
  label="Task",
@@ -128,35 +110,42 @@ def get_demo(leaderboard_tab):
128
  interactive=True,
129
  )
130
 
 
 
 
 
 
 
 
 
131
  with gr.Row():
132
  leaderboard_df = gr.DataFrame(display_df, datatype=types, interactive=False)
133
 
134
- def update_leaderboard_records(model_id, dataset_id, issue_columns, info_columns, task):
135
  global update_time
136
  if datetime.datetime.now() - update_time < datetime.timedelta(minutes=10):
137
  return gr.update()
138
  update_time = datetime.datetime.now()
139
  logger.info("Updating leaderboard records")
140
  leaderboard.records = get_records_from_dataset_repo(leaderboard.LEADERBOARD)
141
- return filter_table(model_id, dataset_id, issue_columns, info_columns, task)
142
 
143
  leaderboard_tab.select(
144
  fn=update_leaderboard_records,
145
- inputs=[model_select, dataset_select, issue_columns_select, info_columns_select, task_select],
146
  outputs=[leaderboard_df])
147
 
148
  @gr.on(
149
  triggers=[
150
  model_select.change,
151
  dataset_select.change,
152
- issue_columns_select.change,
153
- info_columns_select.change,
154
  task_select.change,
155
  ],
156
- inputs=[model_select, dataset_select, issue_columns_select, info_columns_select, task_select],
157
  outputs=[leaderboard_df],
158
  )
159
- def filter_table(model_id, dataset_id, issue_columns, info_columns, task):
160
  logger.info("Filtering leaderboard records")
161
  records = leaderboard.records
162
  # filter the table based on task
@@ -167,9 +156,8 @@ def get_demo(leaderboard_tab):
167
  if dataset_id and dataset_id != "Any":
168
  df = df[(df["dataset_id"] == dataset_id)]
169
 
170
- # filter the table based on the columns
171
- issue_columns.sort()
172
- df = df[info_columns + issue_columns]
173
  types = get_types(df)
174
  display_df = get_display_df(df)
175
  return gr.update(value=display_df, datatype=types, interactive=False)
 
5
  import pandas as pd
6
  import datetime
7
 
8
+ from utils.fetch_utils import (check_dataset_and_get_config,
9
  check_dataset_and_get_split)
10
 
11
+ import utils.leaderboard as leaderboard
12
  logger = logging.getLogger(__name__)
13
  global update_time
14
  update_time = datetime.datetime.fromtimestamp(0)
 
88
  dataset_ids = get_dataset_ids(records)
89
 
90
  column_names = records.columns.tolist()
 
 
91
  default_columns = ["model_id", "dataset_id", "total_issues", "report_link"]
92
  default_df = records[default_columns] # extract columns selected
93
  types = get_types(default_df)
94
  display_df = get_display_df(default_df) # the styled dataframe to display
95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  with gr.Row():
97
  task_select = gr.Dropdown(
98
  label="Task",
 
110
  interactive=True,
111
  )
112
 
113
+ with gr.Row():
114
+ columns_select = gr.CheckboxGroup(
115
+ label="Show columns",
116
+ choices=column_names,
117
+ value=default_columns,
118
+ interactive=True,
119
+ )
120
+
121
  with gr.Row():
122
  leaderboard_df = gr.DataFrame(display_df, datatype=types, interactive=False)
123
 
124
+ def update_leaderboard_records(model_id, dataset_id, columns, task):
125
  global update_time
126
  if datetime.datetime.now() - update_time < datetime.timedelta(minutes=10):
127
  return gr.update()
128
  update_time = datetime.datetime.now()
129
  logger.info("Updating leaderboard records")
130
  leaderboard.records = get_records_from_dataset_repo(leaderboard.LEADERBOARD)
131
+ return filter_table(model_id, dataset_id, columns, task)
132
 
133
  leaderboard_tab.select(
134
  fn=update_leaderboard_records,
135
+ inputs=[model_select, dataset_select, columns_select, task_select],
136
  outputs=[leaderboard_df])
137
 
138
  @gr.on(
139
  triggers=[
140
  model_select.change,
141
  dataset_select.change,
142
+ columns_select.change,
 
143
  task_select.change,
144
  ],
145
+ inputs=[model_select, dataset_select, columns_select, task_select],
146
  outputs=[leaderboard_df],
147
  )
148
+ def filter_table(model_id, dataset_id, columns, task):
149
  logger.info("Filtering leaderboard records")
150
  records = leaderboard.records
151
  # filter the table based on task
 
156
  if dataset_id and dataset_id != "Any":
157
  df = df[(df["dataset_id"] == dataset_id)]
158
 
159
+ # filter the table based on the columns
160
+ df = df[columns]
 
161
  types = get_types(df)
162
  display_df = get_display_df(df)
163
  return gr.update(value=display_df, datatype=types, interactive=False)
app_legacy.py CHANGED
@@ -376,7 +376,7 @@ def get_demo():
376
  selected = read_scanners("./config.yaml")
377
  scan_config = selected + ["data_leakage"]
378
  scanners = gr.CheckboxGroup(
379
- choices=scan_config, value=selected, visible=True
380
  )
381
 
382
  with gr.Row():
 
376
  selected = read_scanners("./config.yaml")
377
  scan_config = selected + ["data_leakage"]
378
  scanners = gr.CheckboxGroup(
379
+ choices=scan_config, value=selected, label="Scan Settings", visible=True
380
  )
381
 
382
  with gr.Row():
app_text_classification.py CHANGED
@@ -2,12 +2,12 @@ import uuid
2
 
3
  import gradio as gr
4
 
5
- from io_utils import read_scanners, write_scanners
6
- from text_classification_ui_helpers import (
7
  get_related_datasets_from_leaderboard,
8
  align_columns_and_show_prediction,
9
- get_dataset_splits,
10
  check_dataset,
 
11
  precheck_model_ds_enable_example_btn,
12
  try_submit,
13
  empty_column_mapping,
@@ -16,11 +16,12 @@ from text_classification_ui_helpers import (
16
  )
17
 
18
  import logging
19
- from wordings import (
20
  CONFIRM_MAPPING_DETAILS_MD,
21
  INTRODUCTION_MD,
22
- LOG_IN_TIPS,
23
  CHECK_LOG_SECTION_RAW,
 
24
  )
25
 
26
  MAX_LABELS = 40
@@ -33,16 +34,9 @@ logger = logging.getLogger(__name__)
33
  def get_demo():
34
  with gr.Row():
35
  gr.Markdown(INTRODUCTION_MD)
36
-
37
- with gr.Row(visible=False):
38
  uid_label = gr.Textbox(
39
  label="Evaluation ID:", value=uuid.uuid4, visible=False, interactive=False
40
  )
41
-
42
- with gr.Accordion(label="Log In", open=True):
43
- gr.HTML(LOG_IN_TIPS)
44
- gr.LoginButton()
45
-
46
  with gr.Row():
47
  model_id_input = gr.Textbox(
48
  label="Hugging Face Model id",
@@ -64,7 +58,7 @@ def get_demo():
64
  with gr.Row():
65
  first_line_ds = gr.DataFrame(label="Dataset Preview", visible=False)
66
  with gr.Row():
67
- loading_dataset_info = gr.HTML(visible=True)
68
  with gr.Row():
69
  example_btn = gr.Button(
70
  "Validate Model & Dataset",
@@ -72,13 +66,11 @@ def get_demo():
72
  variant="primary",
73
  interactive=False,
74
  )
 
75
  with gr.Row():
76
- loading_validation = gr.HTML(visible=True)
77
- with gr.Row():
78
- validation_result = gr.HTML(visible=False)
79
  with gr.Row():
80
- example_input = gr.Textbox(label="Example Input", visible=False, interactive=False)
81
- example_prediction = gr.Label(label="Model Sample Prediction", visible=False)
82
 
83
  with gr.Row():
84
  with gr.Accordion(
@@ -97,8 +89,27 @@ def get_demo():
97
  for _ in range(MAX_LABELS, MAX_LABELS + MAX_FEATURES):
98
  column_mappings.append(gr.Dropdown(visible=False))
99
 
100
- with gr.Accordion(label="Scanner Advanced Config (optional)", open=False):
101
- scanners = gr.CheckboxGroup(visible=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
  @gr.on(triggers=[uid_label.change], inputs=[uid_label], outputs=[scanners])
104
  def get_scanners(uid):
@@ -106,16 +117,7 @@ def get_demo():
106
  # we remove data_leakage from the default scanners
107
  # Reason: data_leakage barely raises any issues and takes too many requests
108
  # when using inference API, causing rate limit error
109
- scan_config = [
110
- "ethical_bias",
111
- "text_perturbation",
112
- "robustness",
113
- "performance",
114
- "underconfidence",
115
- "overconfidence",
116
- "spurious_correlation",
117
- "data_leakage",
118
- ]
119
  return gr.update(
120
  choices=scan_config, value=selected, label="Scan Settings", visible=True
121
  )
@@ -145,20 +147,18 @@ def get_demo():
145
  inputs=[model_id_input],
146
  outputs=[dataset_id_input],
147
  ).then(
148
- fn=check_dataset,
149
- inputs=[dataset_id_input],
150
- outputs=[dataset_config_input, dataset_split_input, loading_dataset_info],
151
  )
152
 
153
  gr.on(
154
- triggers=[dataset_id_input.input, dataset_id_input.select],
155
  fn=check_dataset,
156
  inputs=[dataset_id_input],
157
- outputs=[dataset_config_input, dataset_split_input, loading_dataset_info]
158
  )
159
 
160
- dataset_config_input.change(fn=get_dataset_splits, inputs=[dataset_id_input, dataset_config_input], outputs=[dataset_split_input])
161
-
162
  gr.on(
163
  triggers=[model_id_input.change, dataset_id_input.change, dataset_config_input.change],
164
  fn=empty_column_mapping,
@@ -187,7 +187,6 @@ def get_demo():
187
  gr.on(
188
  triggers=[
189
  model_id_input.change,
190
- model_id_input.input,
191
  dataset_id_input.change,
192
  dataset_config_input.change,
193
  dataset_split_input.change,
@@ -199,13 +198,7 @@ def get_demo():
199
  dataset_config_input,
200
  dataset_split_input,
201
  ],
202
- outputs=[
203
- example_btn,
204
- first_line_ds,
205
- validation_result,
206
- example_input,
207
- example_prediction,
208
- column_mapping_accordion,],
209
  )
210
 
211
  gr.on(
@@ -219,14 +212,15 @@ def get_demo():
219
  dataset_config_input,
220
  dataset_split_input,
221
  uid_label,
 
 
222
  ],
223
  outputs=[
224
- validation_result,
225
  example_input,
226
  example_prediction,
227
  column_mapping_accordion,
228
  run_btn,
229
- loading_validation,
230
  *column_mappings,
231
  ],
232
  )
@@ -241,26 +235,24 @@ def get_demo():
241
  dataset_id_input,
242
  dataset_config_input,
243
  dataset_split_input,
 
 
244
  uid_label,
245
  ],
246
- outputs=[
247
- run_btn,
248
- logs,
249
- uid_label,
250
- validation_result,
251
- example_input,
252
- example_prediction,
253
- column_mapping_accordion,
254
- ],
255
  )
256
 
257
  gr.on(
258
  triggers=[
 
 
259
  scanners.input,
260
  ],
261
  fn=enable_run_btn,
262
  inputs=[
263
  uid_label,
 
 
264
  model_id_input,
265
  dataset_id_input,
266
  dataset_config_input,
@@ -274,6 +266,8 @@ def get_demo():
274
  fn=enable_run_btn,
275
  inputs=[
276
  uid_label,
 
 
277
  model_id_input,
278
  dataset_id_input,
279
  dataset_config_input,
 
2
 
3
  import gradio as gr
4
 
5
+ from utils.io_utils import read_scanners, write_scanners
6
+ from utils.ui_helpers import (
7
  get_related_datasets_from_leaderboard,
8
  align_columns_and_show_prediction,
 
9
  check_dataset,
10
+ show_hf_token_info,
11
  precheck_model_ds_enable_example_btn,
12
  try_submit,
13
  empty_column_mapping,
 
16
  )
17
 
18
  import logging
19
+ from utils.wordings import (
20
  CONFIRM_MAPPING_DETAILS_MD,
21
  INTRODUCTION_MD,
22
+ USE_INFERENCE_API_TIP,
23
  CHECK_LOG_SECTION_RAW,
24
+ HF_TOKEN_INVALID_STYLED
25
  )
26
 
27
  MAX_LABELS = 40
 
34
  def get_demo():
35
  with gr.Row():
36
  gr.Markdown(INTRODUCTION_MD)
 
 
37
  uid_label = gr.Textbox(
38
  label="Evaluation ID:", value=uuid.uuid4, visible=False, interactive=False
39
  )
 
 
 
 
 
40
  with gr.Row():
41
  model_id_input = gr.Textbox(
42
  label="Hugging Face Model id",
 
58
  with gr.Row():
59
  first_line_ds = gr.DataFrame(label="Dataset Preview", visible=False)
60
  with gr.Row():
61
+ loading_status = gr.HTML(visible=True)
62
  with gr.Row():
63
  example_btn = gr.Button(
64
  "Validate Model & Dataset",
 
66
  variant="primary",
67
  interactive=False,
68
  )
69
+
70
  with gr.Row():
71
+ example_input = gr.HTML(visible=False)
 
 
72
  with gr.Row():
73
+ example_prediction = gr.Label(label="Model Prediction Sample", visible=False)
 
74
 
75
  with gr.Row():
76
  with gr.Accordion(
 
89
  for _ in range(MAX_LABELS, MAX_LABELS + MAX_FEATURES):
90
  column_mappings.append(gr.Dropdown(visible=False))
91
 
92
+ with gr.Accordion(label="Model Wrap Advance Config", open=True):
93
+ gr.HTML(USE_INFERENCE_API_TIP)
94
+
95
+ run_inference = gr.Checkbox(value=True, label="Run with Inference API")
96
+ inference_token = gr.Textbox(
97
+ placeholder="hf-xxxxxxxxxxxxxxxxxxxx",
98
+ value="",
99
+ label="HF Token for Inference API",
100
+ visible=True,
101
+ interactive=True,
102
+ )
103
+ inference_token_info = gr.HTML(value=HF_TOKEN_INVALID_STYLED, visible=False)
104
+
105
+ inference_token.change(
106
+ fn=show_hf_token_info,
107
+ inputs=[inference_token],
108
+ outputs=[inference_token_info],
109
+ )
110
+
111
+ with gr.Accordion(label="Scanner Advance Config (optional)", open=False):
112
+ scanners = gr.CheckboxGroup(label="Scan Settings", visible=True)
113
 
114
  @gr.on(triggers=[uid_label.change], inputs=[uid_label], outputs=[scanners])
115
  def get_scanners(uid):
 
117
  # we remove data_leakage from the default scanners
118
  # Reason: data_leakage barely raises any issues and takes too many requests
119
  # when using inference API, causing rate limit error
120
+ scan_config = selected + ["data_leakage"]
 
 
 
 
 
 
 
 
 
121
  return gr.update(
122
  choices=scan_config, value=selected, label="Scan Settings", visible=True
123
  )
 
147
  inputs=[model_id_input],
148
  outputs=[dataset_id_input],
149
  ).then(
150
+ fn=check_dataset,
151
+ inputs=[dataset_id_input],
152
+ outputs=[dataset_config_input, dataset_split_input, loading_status]
153
  )
154
 
155
  gr.on(
156
+ triggers=[dataset_id_input.change],
157
  fn=check_dataset,
158
  inputs=[dataset_id_input],
159
+ outputs=[dataset_config_input, dataset_split_input, loading_status]
160
  )
161
 
 
 
162
  gr.on(
163
  triggers=[model_id_input.change, dataset_id_input.change, dataset_config_input.change],
164
  fn=empty_column_mapping,
 
187
  gr.on(
188
  triggers=[
189
  model_id_input.change,
 
190
  dataset_id_input.change,
191
  dataset_config_input.change,
192
  dataset_split_input.change,
 
198
  dataset_config_input,
199
  dataset_split_input,
200
  ],
201
+ outputs=[example_btn, first_line_ds, loading_status],
 
 
 
 
 
 
202
  )
203
 
204
  gr.on(
 
212
  dataset_config_input,
213
  dataset_split_input,
214
  uid_label,
215
+ run_inference,
216
+ inference_token,
217
  ],
218
  outputs=[
 
219
  example_input,
220
  example_prediction,
221
  column_mapping_accordion,
222
  run_btn,
223
+ loading_status,
224
  *column_mappings,
225
  ],
226
  )
 
235
  dataset_id_input,
236
  dataset_config_input,
237
  dataset_split_input,
238
+ run_inference,
239
+ inference_token,
240
  uid_label,
241
  ],
242
+ outputs=[run_btn, logs, uid_label],
 
 
 
 
 
 
 
 
243
  )
244
 
245
  gr.on(
246
  triggers=[
247
+ run_inference.input,
248
+ inference_token.input,
249
  scanners.input,
250
  ],
251
  fn=enable_run_btn,
252
  inputs=[
253
  uid_label,
254
+ run_inference,
255
+ inference_token,
256
  model_id_input,
257
  dataset_id_input,
258
  dataset_config_input,
 
266
  fn=enable_run_btn,
267
  inputs=[
268
  uid_label,
269
+ run_inference,
270
+ inference_token,
271
  model_id_input,
272
  dataset_id_input,
273
  dataset_config_input,
fetch_utils.py DELETED
@@ -1,32 +0,0 @@
1
- import logging
2
-
3
- import datasets
4
-
5
-
6
- def check_dataset_and_get_config(dataset_id):
7
- try:
8
- configs = datasets.get_dataset_config_names(dataset_id, trust_remote_code=True)
9
- return configs
10
- except Exception:
11
- # Dataset may not exist
12
- return None
13
-
14
-
15
- def check_dataset_and_get_split(dataset_id, dataset_config):
16
- try:
17
- ds = datasets.load_dataset(dataset_id, dataset_config, trust_remote_code=True)
18
- except Exception as e:
19
- # Dataset may not exist
20
- logging.warning(
21
- f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}"
22
- )
23
- return None
24
- try:
25
- splits = list(ds.keys())
26
- return splits
27
- except Exception as e:
28
- # Dataset has no splits
29
- logging.warning(
30
- f"Dataset {dataset_id} with config {dataset_config} has no splits: {e}"
31
- )
32
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
io_utils.py DELETED
@@ -1,134 +0,0 @@
1
- import os
2
- import logging
3
- import yaml
4
-
5
- YAML_PATH = "./cicd/configs"
6
- LOG_FILE = "temp_log"
7
-
8
- logger = logging.getLogger(__name__)
9
-
10
- class Dumper(yaml.Dumper):
11
- def increase_indent(self, flow=False, *args, **kwargs):
12
- return super().increase_indent(flow=flow, indentless=False)
13
-
14
- def get_submitted_yaml_path(uid):
15
- if not os.path.exists(f"{YAML_PATH}/submitted"):
16
- os.makedirs(f"{YAML_PATH}/submitted")
17
- if not os.path.exists(f"{YAML_PATH}/{uid}_config.yaml"):
18
- logger.error(f"config.yaml does not exist for {uid}")
19
- os.system(f"cp config.yaml {YAML_PATH}/{uid}_config.yaml")
20
- if not os.path.exists(f"{YAML_PATH}/submitted/{uid}_config.yaml"):
21
- os.system(f"cp {YAML_PATH}/{uid}_config.yaml {YAML_PATH}/submitted/{uid}_config.yaml")
22
- return f"{YAML_PATH}/submitted/{uid}_config.yaml"
23
-
24
- def get_yaml_path(uid):
25
- if not os.path.exists(YAML_PATH):
26
- os.makedirs(YAML_PATH)
27
- if not os.path.exists(f"{YAML_PATH}/{uid}_config.yaml"):
28
- os.system(f"cp config.yaml {YAML_PATH}/{uid}_config.yaml")
29
- return f"{YAML_PATH}/{uid}_config.yaml"
30
-
31
-
32
- # read scanners from yaml file
33
- # return a list of scanners
34
- def read_scanners(uid):
35
- scanners = []
36
- with open(get_yaml_path(uid), "r") as f:
37
- config = yaml.load(f, Loader=yaml.FullLoader)
38
- scanners = config.get("detectors", [])
39
- return scanners
40
-
41
-
42
- # convert a list of scanners to yaml file
43
- def write_scanners(scanners, uid):
44
- with open(get_yaml_path(uid), "r") as f:
45
- config = yaml.load(f, Loader=yaml.FullLoader)
46
- if config:
47
- config["detectors"] = scanners
48
- # save scanners to detectors in yaml
49
- with open(get_yaml_path(uid), "w") as f:
50
- yaml.dump(config, f, Dumper=Dumper)
51
-
52
-
53
- # read model_type from yaml file
54
- def read_inference_type(uid):
55
- inference_type = ""
56
- with open(get_yaml_path(uid), "r") as f:
57
- config = yaml.load(f, Loader=yaml.FullLoader)
58
- inference_type = config.get("inference_type", "")
59
- return inference_type
60
-
61
-
62
- # write model_type to yaml file
63
- def write_inference_type(use_inference, inference_token, uid):
64
- with open(get_yaml_path(uid), "r") as f:
65
- config = yaml.load(f, Loader=yaml.FullLoader)
66
- if use_inference:
67
- config["inference_type"] = "hf_inference_api"
68
- config["inference_token"] = inference_token
69
- else:
70
- config["inference_type"] = "hf_pipeline"
71
- # FIXME: A quick and temp fix for missing token
72
- config["inference_token"] = ""
73
- # save inference_type to inference_type in yaml
74
- with open(get_yaml_path(uid), "w") as f:
75
- yaml.dump(config, f, Dumper=Dumper)
76
-
77
-
78
- # read column mapping from yaml file
79
- def read_column_mapping(uid):
80
- column_mapping = {}
81
- with open(get_yaml_path(uid), "r") as f:
82
- config = yaml.load(f, Loader=yaml.FullLoader)
83
- if config:
84
- column_mapping = config.get("column_mapping", dict())
85
- if column_mapping is None:
86
- column_mapping = {}
87
- return column_mapping
88
-
89
-
90
- # write column mapping to yaml file
91
- def write_column_mapping(mapping, uid):
92
- with open(get_yaml_path(uid), "r") as f:
93
- config = yaml.load(f, Loader=yaml.FullLoader)
94
-
95
- if config is None:
96
- return
97
- if mapping is None and "column_mapping" in config.keys():
98
- del config["column_mapping"]
99
- else:
100
- config["column_mapping"] = mapping
101
- with open(get_yaml_path(uid), "w") as f:
102
- # yaml Dumper will by default sort the keys
103
- yaml.dump(config, f, Dumper=Dumper, sort_keys=False)
104
-
105
-
106
- # convert column mapping dataframe to json
107
- def convert_column_mapping_to_json(df, label=""):
108
- column_mapping = {}
109
- column_mapping[label] = []
110
- for _, row in df.iterrows():
111
- column_mapping[label].append(row.tolist())
112
- return column_mapping
113
-
114
-
115
- def get_log_file_with_uid(uid):
116
- try:
117
- print(f"Loading {uid}.log")
118
- with open(f"./tmp/{uid}.log", "a") as file:
119
- return file.read()
120
- except Exception:
121
- return "Log file does not exist"
122
-
123
-
124
- def get_logs_file():
125
- try:
126
- with open(LOG_FILE, "r") as file:
127
- return file.read()
128
- except Exception:
129
- return "Log file does not exist"
130
-
131
-
132
- def write_log_to_user_file(task_id, log):
133
- with open(f"./tmp/{task_id}.log", "a") as f:
134
- f.write(log)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
isolated_env.py CHANGED
@@ -1,7 +1,7 @@
1
  import os
2
  import subprocess
3
 
4
- from io_utils import write_log_to_user_file
5
 
6
 
7
  def prepare_venv(execution_id, deps):
 
1
  import os
2
  import subprocess
3
 
4
+ from utils.io_utils import write_log_to_user_file
5
 
6
 
7
  def prepare_venv(execution_id, deps):
leaderboard.py DELETED
@@ -1,5 +0,0 @@
1
- import pandas as pd
2
-
3
- records = pd.DataFrame()
4
-
5
- LEADERBOARD = "giskard-bot/evaluator-leaderboard"
 
 
 
 
 
 
pipe.py DELETED
@@ -1,3 +0,0 @@
1
-
2
- jobs = list()
3
- current = None
 
 
 
 
requirements.txt CHANGED
@@ -4,6 +4,4 @@ hf-transfer
4
  torch==2.0.1
5
  transformers
6
  datasets
7
- tabulate
8
- gradio[oauth]==4.19.2
9
  -e git+https://github.com/Giskard-AI/cicd.git#egg=giskard-cicd
 
4
  torch==2.0.1
5
  transformers
6
  datasets
 
 
7
  -e git+https://github.com/Giskard-AI/cicd.git#egg=giskard-cicd
run_jobs.py DELETED
@@ -1,181 +0,0 @@
1
- import json
2
- import logging
3
- import os
4
- import subprocess
5
- import threading
6
- import time
7
- from pathlib import Path
8
-
9
- import pipe
10
- from app_env import (
11
- HF_GSK_HUB_HF_TOKEN,
12
- HF_GSK_HUB_KEY,
13
- HF_GSK_HUB_PROJECT_KEY,
14
- HF_GSK_HUB_UNLOCK_TOKEN,
15
- HF_GSK_HUB_URL,
16
- HF_REPO_ID,
17
- HF_SPACE_ID,
18
- HF_WRITE_TOKEN,
19
- )
20
- from io_utils import LOG_FILE, get_submitted_yaml_path, write_log_to_user_file
21
- from isolated_env import prepare_venv
22
- from leaderboard import LEADERBOARD
23
-
24
- is_running = False
25
-
26
- logger = logging.getLogger(__file__)
27
-
28
-
29
- def start_process_run_job():
30
- try:
31
- logging.debug("Running jobs in thread")
32
- global thread, is_running
33
- thread = threading.Thread(target=run_job)
34
- thread.daemon = True
35
- is_running = True
36
- thread.start()
37
-
38
- except Exception as e:
39
- print("Failed to start thread: ", e)
40
-
41
-
42
- def stop_thread():
43
- logging.debug("Stop thread")
44
- global is_running
45
- is_running = False
46
-
47
-
48
- def prepare_env_and_get_command(
49
- m_id,
50
- d_id,
51
- config,
52
- split,
53
- inference_token,
54
- uid,
55
- label_mapping,
56
- feature_mapping,
57
- ):
58
- leaderboard_dataset = None
59
- if os.environ.get("SPACE_ID") == "giskardai/giskard-evaluator":
60
- leaderboard_dataset = LEADERBOARD
61
-
62
- executable = "giskard_scanner"
63
- try:
64
- # Copy the current requirements (might be changed)
65
- with open("requirements.txt", "r") as f:
66
- executable = prepare_venv(
67
- uid,
68
- "\n".join(f.readlines()),
69
- )
70
- logger.info(f"Using {executable} as executable")
71
- except Exception as e:
72
- logger.warn(f"Create env failed due to {e}, using the current env as fallback.")
73
- executable = "giskard_scanner"
74
-
75
- command = [
76
- executable,
77
- "--loader",
78
- "huggingface",
79
- "--model",
80
- m_id,
81
- "--dataset",
82
- d_id,
83
- "--dataset_config",
84
- config,
85
- "--dataset_split",
86
- split,
87
- "--output_format",
88
- "markdown",
89
- "--output_portal",
90
- "huggingface",
91
- "--feature_mapping",
92
- json.dumps(feature_mapping),
93
- "--label_mapping",
94
- json.dumps(label_mapping),
95
- "--scan_config",
96
- get_submitted_yaml_path(uid),
97
- "--inference_type",
98
- "hf_inference_api",
99
- "--inference_api_token",
100
- inference_token,
101
- ]
102
- # The token to publish post
103
- if os.environ.get(HF_WRITE_TOKEN):
104
- command.append("--hf_token")
105
- command.append(os.environ.get(HF_WRITE_TOKEN))
106
-
107
- # The repo to publish post
108
- if os.environ.get(HF_REPO_ID) or os.environ.get(HF_SPACE_ID):
109
- command.append("--discussion_repo")
110
- # TODO: Replace by the model id
111
- command.append(os.environ.get(HF_REPO_ID) or os.environ.get(HF_SPACE_ID))
112
-
113
- # The repo to publish for ranking
114
- if leaderboard_dataset:
115
- command.append("--leaderboard_dataset")
116
- command.append(leaderboard_dataset)
117
-
118
- # The info to upload to Giskard hub
119
- if os.environ.get(HF_GSK_HUB_KEY):
120
- command.append("--giskard_hub_api_key")
121
- command.append(os.environ.get(HF_GSK_HUB_KEY))
122
- if os.environ.get(HF_GSK_HUB_URL):
123
- command.append("--giskard_hub_url")
124
- command.append(os.environ.get(HF_GSK_HUB_URL))
125
- if os.environ.get(HF_GSK_HUB_PROJECT_KEY):
126
- command.append("--giskard_hub_project_key")
127
- command.append(os.environ.get(HF_GSK_HUB_PROJECT_KEY))
128
- if os.environ.get(HF_GSK_HUB_HF_TOKEN):
129
- command.append("--giskard_hub_hf_token")
130
- command.append(os.environ.get(HF_GSK_HUB_HF_TOKEN))
131
- if os.environ.get(HF_GSK_HUB_UNLOCK_TOKEN):
132
- command.append("--giskard_hub_unlock_token")
133
- command.append(os.environ.get(HF_GSK_HUB_UNLOCK_TOKEN))
134
-
135
- eval_str = f"[{m_id}]<{d_id}({config}, {split} set)>"
136
-
137
- write_log_to_user_file(
138
- uid,
139
- f"Start local evaluation on {eval_str}. Please wait for your job to start...\n",
140
- )
141
-
142
- return command
143
-
144
-
145
- def save_job_to_pipe(task_id, job, description, lock):
146
- with lock:
147
- pipe.jobs.append((task_id, job, description))
148
-
149
-
150
- def pop_job_from_pipe():
151
- if len(pipe.jobs) == 0:
152
- return
153
- job_info = pipe.jobs.pop()
154
- pipe.current = job_info[2]
155
- task_id = job_info[0]
156
-
157
- # Link to LOG_FILE
158
- log_file_path = Path(LOG_FILE)
159
- if log_file_path.exists():
160
- log_file_path.unlink()
161
- os.symlink(f"./tmp/{task_id}.log", LOG_FILE)
162
-
163
- write_log_to_user_file(task_id, f"Running job id {task_id}\n")
164
- command = prepare_env_and_get_command(*job_info[1])
165
-
166
- with open(f"./tmp/{task_id}.log", "a") as log_file:
167
- p = subprocess.Popen(command, stdout=log_file, stderr=subprocess.STDOUT)
168
- p.wait()
169
- pipe.current = None
170
-
171
-
172
- def run_job():
173
- global is_running
174
- while is_running:
175
- try:
176
- pop_job_from_pipe()
177
- time.sleep(10)
178
- except KeyboardInterrupt:
179
- logging.debug("KeyboardInterrupt stop background thread")
180
- is_running = False
181
- break
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
text_classification.py DELETED
@@ -1,409 +0,0 @@
1
- import json
2
- import logging
3
-
4
- import datasets
5
- import huggingface_hub
6
- import pandas as pd
7
- from transformers import pipeline
8
- import requests
9
- import os
10
- from app_env import HF_WRITE_TOKEN
11
-
12
- logger = logging.getLogger(__name__)
13
- AUTH_CHECK_URL = "https://huggingface.co/api/whoami-v2"
14
-
15
- logger = logging.getLogger(__file__)
16
-
17
- class HuggingFaceInferenceAPIResponse:
18
- def __init__(self, message):
19
- self.message = message
20
-
21
-
22
- def get_labels_and_features_from_dataset(ds):
23
- try:
24
- dataset_features = ds.features
25
- label_keys = [i for i in dataset_features.keys() if i.startswith("label")]
26
- features = [f for f in dataset_features.keys() if not f.startswith("label")]
27
-
28
- if len(label_keys) == 0: # no labels found
29
- # return everything for post processing
30
- return list(dataset_features.keys()), list(dataset_features.keys()), None
31
-
32
- labels = None
33
- if not isinstance(dataset_features[label_keys[0]], datasets.ClassLabel):
34
- if hasattr(dataset_features[label_keys[0]], "feature"):
35
- label_feat = dataset_features[label_keys[0]].feature
36
- labels = label_feat.names
37
- else:
38
- labels = ds.unique(label_keys[0])
39
- else:
40
- labels = dataset_features[label_keys[0]].names
41
- return labels, features, label_keys
42
- except Exception as e:
43
- logging.warning(
44
- f"Get Labels/Features Failed for dataset: {e}"
45
- )
46
- return None, None, None
47
-
48
- def check_model_task(model_id):
49
- # check if model is valid on huggingface
50
- try:
51
- task = huggingface_hub.model_info(model_id).pipeline_tag
52
- if task is None:
53
- return None
54
- return task
55
- except Exception:
56
- return None
57
-
58
- def get_model_labels(model_id, example_input):
59
- hf_token = os.environ.get(HF_WRITE_TOKEN, default="")
60
- payload = {"inputs": example_input, "options": {"use_cache": True}}
61
- response = hf_inference_api(model_id, hf_token, payload)
62
- if "error" in response:
63
- return None
64
- return extract_from_response(response, "label")
65
-
66
- def extract_from_response(data, key):
67
- results = []
68
-
69
- if isinstance(data, dict):
70
- res = data.get(key)
71
- if res is not None:
72
- results.append(res)
73
-
74
- for value in data.values():
75
- results.extend(extract_from_response(value, key))
76
-
77
- elif isinstance(data, list):
78
- for element in data:
79
- results.extend(extract_from_response(element, key))
80
-
81
- return results
82
-
83
- def hf_inference_api(model_id, hf_token, payload):
84
- hf_inference_api_endpoint = os.environ.get(
85
- "HF_INFERENCE_ENDPOINT", default="https://api-inference.huggingface.co"
86
- )
87
- url = f"{hf_inference_api_endpoint}/models/{model_id}"
88
- headers = {"Authorization": f"Bearer {hf_token}"}
89
- response = requests.post(url, headers=headers, json=payload)
90
-
91
- if not hasattr(response, "status_code") or response.status_code != 200:
92
- logger.warning(f"Request to inference API returns {response}")
93
-
94
- try:
95
- output = response.json()
96
- if "error" in output and "Input is too long" in output["error"]:
97
- payload.update({"parameters": {"truncation": True, "max_length": 512}})
98
- response = requests.post(url, headers=headers, json=payload)
99
- if not hasattr(response, "status_code") or response.status_code != 200:
100
- logger.warning(f"Request to inference API returns {response}")
101
- return response.json()
102
- except Exception:
103
- return {"error": response.content}
104
-
105
- def preload_hf_inference_api(model_id):
106
- payload = {"inputs": "This is a test", "options": {"use_cache": True, }}
107
- hf_token = os.environ.get(HF_WRITE_TOKEN, default="")
108
- hf_inference_api(model_id, hf_token, payload)
109
-
110
- def check_model_pipeline(model_id):
111
- try:
112
- task = huggingface_hub.model_info(model_id).pipeline_tag
113
- except Exception:
114
- return None
115
-
116
- try:
117
- ppl = pipeline(task=task, model=model_id)
118
-
119
- return ppl
120
- except Exception:
121
- return None
122
-
123
-
124
- def text_classificaiton_match_label_case_unsensative(id2label_mapping, label):
125
- for model_label in id2label_mapping.keys():
126
- if model_label.upper() == label.upper():
127
- return model_label, label
128
- return None, label
129
-
130
-
131
- def text_classification_map_model_and_dataset_labels(id2label, dataset_features):
132
- id2label_mapping = {id2label[k]: None for k in id2label.keys()}
133
- dataset_labels = None
134
- for feature in dataset_features.values():
135
- if not isinstance(feature, datasets.ClassLabel):
136
- continue
137
- if len(feature.names) != len(id2label_mapping.keys()):
138
- continue
139
-
140
- dataset_labels = feature.names
141
- # Try to match labels
142
- for label in feature.names:
143
- if label in id2label_mapping.keys():
144
- model_label = label
145
- else:
146
- # Try to find case unsensative
147
- model_label, label = text_classificaiton_match_label_case_unsensative(
148
- id2label_mapping, label
149
- )
150
- if model_label is not None:
151
- id2label_mapping[model_label] = label
152
- else:
153
- print(f"Label {label} is not found in model labels")
154
-
155
- return id2label_mapping, dataset_labels
156
-
157
-
158
- """
159
- params:
160
- column_mapping: dict
161
- example: {
162
- "text": "sentences",
163
- "label": {
164
- "label0": "LABEL_0",
165
- "label1": "LABEL_1"
166
- }
167
- }
168
- ppl: pipeline
169
- """
170
-
171
-
172
- def check_column_mapping_keys_validity(column_mapping, ppl):
173
- # get the element in all the list elements
174
- column_mapping = json.loads(column_mapping)
175
- if "data" not in column_mapping.keys():
176
- return True
177
- user_labels = set([pair[0] for pair in column_mapping["data"]])
178
- model_labels = set([pair[1] for pair in column_mapping["data"]])
179
-
180
- id2label = ppl.model.config.id2label
181
- original_labels = set(id2label.values())
182
-
183
- return user_labels == model_labels == original_labels
184
-
185
-
186
- """
187
- params:
188
- column_mapping: dict
189
- dataset_features: dict
190
- example: {
191
- 'text': Value(dtype='string', id=None),
192
- 'label': ClassLabel(names=['negative', 'neutral', 'positive'], id=None)
193
- }
194
- """
195
-
196
-
197
- def infer_text_input_column(column_mapping, dataset_features):
198
- # Check whether we need to infer the text input column
199
- infer_text_input_column = True
200
- feature_map_df = None
201
-
202
- if "text" in column_mapping.keys():
203
- dataset_text_column = column_mapping["text"]
204
- if dataset_text_column in dataset_features.keys():
205
- infer_text_input_column = False
206
- else:
207
- logging.warning(f"Provided {dataset_text_column} is not in Dataset columns")
208
-
209
- if infer_text_input_column:
210
- # Try to retrieve one
211
- candidates = [
212
- f for f in dataset_features if dataset_features[f].dtype == "string"
213
- ]
214
- feature_map_df = pd.DataFrame(
215
- {"Dataset Features": [candidates[0]], "Model Input Features": ["text"]}
216
- )
217
- if len(candidates) > 0:
218
- logging.debug(f"Candidates are {candidates}")
219
- column_mapping["text"] = candidates[0]
220
-
221
- return column_mapping, feature_map_df
222
-
223
-
224
- """
225
- params:
226
- column_mapping: dict
227
- id2label_mapping: dict
228
- example:
229
- id2label_mapping: {
230
- 'negative': 'negative',
231
- 'neutral': 'neutral',
232
- 'positive': 'positive'
233
- }
234
- """
235
-
236
-
237
- def infer_output_label_column(
238
- column_mapping, id2label_mapping, id2label, dataset_labels
239
- ):
240
- # Check whether we need to infer the output label column
241
- if "data" in column_mapping.keys():
242
- if isinstance(column_mapping["data"], list):
243
- # Use the column mapping passed by user
244
- for user_label, model_label in column_mapping["data"]:
245
- id2label_mapping[model_label] = user_label
246
- elif None in id2label_mapping.values():
247
- column_mapping["label"] = {i: None for i in id2label.keys()}
248
- return column_mapping, None
249
-
250
- if "data" not in column_mapping.keys():
251
- # Column mapping should contain original model labels
252
- column_mapping["label"] = {
253
- str(i): id2label_mapping[label]
254
- for i, label in zip(id2label.keys(), dataset_labels)
255
- }
256
-
257
- id2label_df = pd.DataFrame(
258
- {
259
- "Dataset Labels": dataset_labels,
260
- "Model Prediction Labels": [
261
- id2label_mapping[label] for label in dataset_labels
262
- ],
263
- }
264
- )
265
-
266
- return column_mapping, id2label_df
267
-
268
-
269
- def check_dataset_features_validity(d_id, config, split):
270
- # We assume dataset is ok here
271
- ds = datasets.load_dataset(d_id, config, split=split, trust_remote_code=True)
272
- try:
273
- dataset_features = ds.features
274
- except AttributeError:
275
- # Dataset does not have features, need to provide everything
276
- return None, None
277
- # Load dataset as DataFrame
278
- df = ds.to_pandas()
279
-
280
- return df, dataset_features
281
-
282
- def select_the_first_string_column(ds):
283
- for feature in ds.features.keys():
284
- if isinstance(ds[0][feature], str):
285
- return feature
286
- return None
287
-
288
-
289
- def get_example_prediction(model_id, dataset_id, dataset_config, dataset_split, hf_token):
290
- # get a sample prediction from the model on the dataset
291
- prediction_input = None
292
- prediction_result = None
293
- try:
294
- # Use the first item to test prediction
295
- ds = datasets.load_dataset(dataset_id, dataset_config, split=dataset_split, trust_remote_code=True)
296
- if "text" not in ds.features.keys():
297
- # Dataset does not have text column
298
- prediction_input = ds[0][select_the_first_string_column(ds)]
299
- else:
300
- prediction_input = ds[0]["text"]
301
-
302
- payload = {"inputs": prediction_input, "options": {"use_cache": True}}
303
- results = hf_inference_api(model_id, hf_token, payload)
304
-
305
- if isinstance(results, dict) and "error" in results.keys():
306
- if "estimated_time" in results.keys():
307
- return prediction_input, HuggingFaceInferenceAPIResponse(
308
- f"Estimated time: {int(results['estimated_time'])}s. Please try again later.")
309
- return prediction_input, HuggingFaceInferenceAPIResponse(
310
- f"Inference Error: {results['error']}.")
311
-
312
- while isinstance(results, list):
313
- if isinstance(results[0], dict):
314
- break
315
- results = results[0]
316
- prediction_result = {
317
- f'{result["label"]}': result["score"] for result in results
318
- }
319
- except Exception as e:
320
- # inference api prediction failed, show the error message
321
- logger.error(f"Get example prediction failed {e}")
322
- return prediction_input, None
323
-
324
- return prediction_input, prediction_result
325
-
326
-
327
- def get_sample_prediction(ppl, df, column_mapping, id2label_mapping):
328
- # get a sample prediction from the model on the dataset
329
- prediction_input = None
330
- prediction_result = None
331
- try:
332
- # Use the first item to test prediction
333
- prediction_input = df.head(1).at[0, column_mapping["text"]]
334
- results = ppl({"text": prediction_input}, top_k=None)
335
- prediction_result = {
336
- f'{result["label"]}': result["score"] for result in results
337
- }
338
- except Exception:
339
- # Pipeline prediction failed, need to provide labels
340
- return prediction_input, None
341
-
342
- # Display results in original label and mapped label
343
- prediction_result = {
344
- f'{result["label"]}(original) - {id2label_mapping[result["label"]]}(mapped)': result[
345
- "score"
346
- ]
347
- for result in results
348
- }
349
- return prediction_input, prediction_result
350
-
351
-
352
- def text_classification_fix_column_mapping(column_mapping, ppl, d_id, config, split):
353
- # load dataset as pd DataFrame
354
- # get features column from dataset
355
- df, dataset_features = check_dataset_features_validity(d_id, config, split)
356
-
357
- column_mapping, feature_map_df = infer_text_input_column(
358
- column_mapping, dataset_features
359
- )
360
- if feature_map_df is None:
361
- # dataset does not have any features
362
- return None, None, None, None, None
363
-
364
- # Retrieve all labels
365
- id2label = ppl.model.config.id2label
366
-
367
- # Infer labels
368
- id2label_mapping, dataset_labels = text_classification_map_model_and_dataset_labels(
369
- id2label, dataset_features
370
- )
371
- column_mapping, id2label_df = infer_output_label_column(
372
- column_mapping, id2label_mapping, id2label, dataset_labels
373
- )
374
- if id2label_df is None:
375
- # does not able to infer output label column
376
- return column_mapping, None, None, None, feature_map_df
377
-
378
- # Get a sample prediction
379
- prediction_input, prediction_result = get_sample_prediction(
380
- ppl, df, column_mapping, id2label_mapping
381
- )
382
- if prediction_result is None:
383
- # does not able to get a sample prediction
384
- return column_mapping, prediction_input, None, id2label_df, feature_map_df
385
-
386
- return (
387
- column_mapping,
388
- prediction_input,
389
- prediction_result,
390
- id2label_df,
391
- feature_map_df,
392
- )
393
-
394
- def strip_model_id_from_url(model_id):
395
- if model_id.startswith("https://huggingface.co/"):
396
- return "/".join(model_id.split("/")[-2:])
397
- return model_id
398
-
399
- def check_hf_token_validity(hf_token):
400
- if hf_token == "":
401
- return False
402
- if not isinstance(hf_token, str):
403
- return False
404
- # use huggingface api to check the token
405
- headers = {"Authorization": f"Bearer {hf_token}"}
406
- response = requests.get(AUTH_CHECK_URL, headers=headers)
407
- if response.status_code != 200:
408
- return False
409
- return True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
text_classification_ui_helpers.py DELETED
@@ -1,458 +0,0 @@
1
- import collections
2
- import logging
3
- import threading
4
- import uuid
5
-
6
- import datasets
7
- import gradio as gr
8
- import pandas as pd
9
-
10
- import leaderboard
11
- from io_utils import (
12
- read_column_mapping,
13
- write_column_mapping,
14
- read_scanners,
15
- write_scanners,
16
- )
17
- from run_jobs import save_job_to_pipe
18
- from text_classification import (
19
- strip_model_id_from_url,
20
- check_model_task,
21
- preload_hf_inference_api,
22
- get_example_prediction,
23
- get_labels_and_features_from_dataset,
24
- check_hf_token_validity,
25
- HuggingFaceInferenceAPIResponse,
26
- )
27
- from wordings import (
28
- CHECK_CONFIG_OR_SPLIT_RAW,
29
- CONFIRM_MAPPING_DETAILS_FAIL_RAW,
30
- MAPPING_STYLED_ERROR_WARNING,
31
- NOT_TEXT_CLASSIFICATION_MODEL_RAW,
32
- UNMATCHED_MODEL_DATASET_STYLED_ERROR,
33
- CHECK_LOG_SECTION_RAW,
34
- VALIDATED_MODEL_DATASET_STYLED,
35
- get_dataset_fetch_error_raw,
36
- )
37
- import os
38
- from app_env import HF_WRITE_TOKEN
39
-
40
- MAX_LABELS = 40
41
- MAX_FEATURES = 20
42
-
43
- ds_dict = None
44
- ds_config = None
45
-
46
- def get_related_datasets_from_leaderboard(model_id):
47
- records = leaderboard.records
48
- model_id = strip_model_id_from_url(model_id)
49
- model_records = records[records["model_id"] == model_id]
50
- datasets_unique = list(model_records["dataset_id"].unique())
51
-
52
- if len(datasets_unique) == 0:
53
- return gr.update(choices=[])
54
-
55
- return gr.update(choices=datasets_unique)
56
-
57
-
58
- logger = logging.getLogger(__file__)
59
-
60
- def get_dataset_splits(dataset_id, dataset_config):
61
- try:
62
- splits = datasets.get_dataset_split_names(dataset_id, dataset_config, trust_remote_code=True)
63
- return gr.update(choices=splits, value=splits[0], visible=True)
64
- except Exception as e:
65
- logger.warn(f"Check your dataset {dataset_id} and config {dataset_config}: {e}")
66
- return gr.update(visible=False)
67
-
68
- def check_dataset(dataset_id):
69
- logger.info(f"Loading {dataset_id}")
70
- try:
71
- configs = datasets.get_dataset_config_names(dataset_id, trust_remote_code=True)
72
- if len(configs) == 0:
73
- return (
74
- gr.update(visible=False),
75
- gr.update(visible=False),
76
- ""
77
- )
78
- splits = datasets.get_dataset_split_names(dataset_id, configs[0], trust_remote_code=True)
79
- return (
80
- gr.update(choices=configs, value=configs[0], visible=True),
81
- gr.update(choices=splits, value=splits[0], visible=True),
82
- ""
83
- )
84
- except Exception as e:
85
- logger.warn(f"Check your dataset {dataset_id}: {e}")
86
- if "doesn't exist" in str(e):
87
- gr.Warning(get_dataset_fetch_error_raw(e))
88
- if "forbidden" in str(e).lower(): # GSK-2770
89
- gr.Warning(get_dataset_fetch_error_raw(e))
90
- return (
91
- gr.update(visible=False),
92
- gr.update(visible=False),
93
- ""
94
- )
95
-
96
- def empty_column_mapping(uid):
97
- write_column_mapping(None, uid)
98
-
99
- def write_column_mapping_to_config(uid, *labels):
100
- # TODO: Substitute 'text' with more features for zero-shot
101
- # we are not using ds features because we only support "text" for now
102
- all_mappings = read_column_mapping(uid)
103
-
104
- if labels is None:
105
- return
106
- all_mappings = export_mappings(all_mappings, "labels", None, labels[:MAX_LABELS])
107
- all_mappings = export_mappings(
108
- all_mappings,
109
- "features",
110
- ["text"],
111
- labels[MAX_LABELS : (MAX_LABELS + MAX_FEATURES)],
112
- )
113
-
114
- write_column_mapping(all_mappings, uid)
115
-
116
- def export_mappings(all_mappings, key, subkeys, values):
117
- if key not in all_mappings.keys():
118
- all_mappings[key] = dict()
119
- if subkeys is None:
120
- subkeys = list(all_mappings[key].keys())
121
-
122
- if not subkeys:
123
- logging.debug(f"subkeys is empty for {key}")
124
- return all_mappings
125
-
126
- for i, subkey in enumerate(subkeys):
127
- if subkey:
128
- all_mappings[key][subkey] = values[i % len(values)]
129
- return all_mappings
130
-
131
-
132
- def list_labels_and_features_from_dataset(ds_labels, ds_features, model_labels, uid):
133
- all_mappings = read_column_mapping(uid)
134
- # For flattened raw datasets with no labels
135
- # check if there are shared labels between model and dataset
136
- shared_labels = set(model_labels).intersection(set(ds_labels))
137
- if shared_labels:
138
- ds_labels = list(shared_labels)
139
- if len(ds_labels) > MAX_LABELS:
140
- ds_labels = ds_labels[:MAX_LABELS]
141
- gr.Warning(f"Too many labels to display for this spcae. We do not support more than {MAX_LABELS} in this space. You can use cli tool at https://github.com/Giskard-AI/cicd.")
142
-
143
- # sort labels to make sure the order is consistent
144
- # prediction gives the order based on probability
145
- ds_labels.sort()
146
- model_labels.sort()
147
-
148
- lables = [
149
- gr.Dropdown(
150
- label=f"{label}",
151
- choices=model_labels,
152
- value=model_labels[i % len(model_labels)],
153
- interactive=True,
154
- visible=True,
155
- )
156
- for i, label in enumerate(ds_labels)
157
- ]
158
- lables += [gr.Dropdown(visible=False) for _ in range(MAX_LABELS - len(lables))]
159
- all_mappings = export_mappings(all_mappings, "labels", ds_labels, model_labels)
160
-
161
- # TODO: Substitute 'text' with more features for zero-shot
162
- features = [
163
- gr.Dropdown(
164
- label=f"{feature}",
165
- choices=ds_features,
166
- value=ds_features[0],
167
- interactive=True,
168
- visible=True,
169
- )
170
- for feature in ["text"]
171
- ]
172
- features += [
173
- gr.Dropdown(visible=False) for _ in range(MAX_FEATURES - len(features))
174
- ]
175
- all_mappings = export_mappings(all_mappings, "features", ["text"], ds_features)
176
- write_column_mapping(all_mappings, uid)
177
-
178
- return lables + features
179
-
180
-
181
- def precheck_model_ds_enable_example_btn(
182
- model_id, dataset_id, dataset_config, dataset_split
183
- ):
184
- model_id = strip_model_id_from_url(model_id)
185
- model_task = check_model_task(model_id)
186
- preload_hf_inference_api(model_id)
187
-
188
- if dataset_config is None or dataset_split is None or len(dataset_config) == 0:
189
- return (
190
- gr.update(interactive=False),
191
- gr.update(visible=False),
192
- gr.update(visible=False),
193
- gr.update(visible=False),
194
- gr.update(visible=False),
195
- gr.update(visible=False),
196
- )
197
-
198
- try:
199
- ds = datasets.load_dataset(dataset_id, dataset_config, trust_remote_code=True)
200
- df: pd.DataFrame = ds[dataset_split].to_pandas().head(5)
201
- ds_labels, ds_features, _ = get_labels_and_features_from_dataset(ds[dataset_split])
202
-
203
- if model_task is None or model_task != "text-classification":
204
- gr.Warning(NOT_TEXT_CLASSIFICATION_MODEL_RAW)
205
- return (
206
- gr.update(interactive=False),
207
- gr.update(value=df, visible=True),
208
- gr.update(visible=False),
209
- gr.update(visible=False),
210
- gr.update(visible=False),
211
- gr.update(visible=False),
212
- )
213
-
214
- if not isinstance(ds_labels, list) or not isinstance(ds_features, list):
215
- gr.Warning(CHECK_CONFIG_OR_SPLIT_RAW)
216
- return (
217
- gr.update(interactive=False),
218
- gr.update(value=df, visible=True),
219
- gr.update(visible=False),
220
- gr.update(visible=False),
221
- gr.update(visible=False),
222
- gr.update(visible=False),
223
- )
224
-
225
- return (
226
- gr.update(interactive=True),
227
- gr.update(value=df, visible=True),
228
- gr.update(visible=False),
229
- gr.update(visible=False),
230
- gr.update(visible=False),
231
- gr.update(visible=False),
232
- )
233
- except Exception as e:
234
- # Config or split wrong
235
- logger.warn(f"Check your dataset {dataset_id} and config {dataset_config} on split {dataset_split}: {e}")
236
- return (
237
- gr.update(interactive=False),
238
- gr.update(visible=False),
239
- gr.update(visible=False),
240
- gr.update(visible=False),
241
- gr.update(visible=False),
242
- gr.update(visible=False),
243
- )
244
-
245
-
246
- def align_columns_and_show_prediction(
247
- model_id,
248
- dataset_id,
249
- dataset_config,
250
- dataset_split,
251
- uid,
252
- profile: gr.OAuthProfile | None,
253
- oauth_token: gr.OAuthToken | None,
254
- ):
255
- model_id = strip_model_id_from_url(model_id)
256
- model_task = check_model_task(model_id)
257
- if model_task is None or model_task != "text-classification":
258
- gr.Warning(NOT_TEXT_CLASSIFICATION_MODEL_RAW)
259
- return (
260
- gr.update(visible=False),
261
- gr.update(visible=False),
262
- gr.update(visible=False, open=False),
263
- gr.update(interactive=False),
264
- "",
265
- *[gr.update(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)],
266
- )
267
-
268
- dropdown_placement = [
269
- gr.Dropdown(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)
270
- ]
271
-
272
- hf_token = os.environ.get(HF_WRITE_TOKEN, default="")
273
-
274
- prediction_input, prediction_response = get_example_prediction(
275
- model_id, dataset_id, dataset_config, dataset_split, hf_token
276
- )
277
-
278
- if prediction_input is None or prediction_response is None:
279
- return (
280
- gr.update(visible=False),
281
- gr.update(visible=False),
282
- gr.update(visible=False),
283
- gr.update(visible=False, open=False),
284
- gr.update(interactive=False),
285
- "",
286
- *dropdown_placement,
287
- )
288
-
289
- if isinstance(prediction_response, HuggingFaceInferenceAPIResponse):
290
- return (
291
- gr.update(visible=False),
292
- gr.update(visible=False),
293
- gr.update(visible=False),
294
- gr.update(visible=False, open=False),
295
- gr.update(interactive=False),
296
- f"Hugging Face Inference API is loading your model. {prediction_response.message}",
297
- *dropdown_placement,
298
- )
299
-
300
- model_labels = list(prediction_response.keys())
301
-
302
- ds = datasets.load_dataset(dataset_id, dataset_config, split=dataset_split, trust_remote_code=True)
303
- ds_labels, ds_features, _ = get_labels_and_features_from_dataset(ds)
304
-
305
- # when dataset does not have labels or features
306
- if not isinstance(ds_labels, list) or not isinstance(ds_features, list):
307
- gr.Warning(CHECK_CONFIG_OR_SPLIT_RAW)
308
- return (
309
- gr.update(visible=False),
310
- gr.update(visible=False),
311
- gr.update(visible=False),
312
- gr.update(visible=False, open=False),
313
- gr.update(interactive=False),
314
- "",
315
- *dropdown_placement,
316
- )
317
-
318
- if len(ds_labels) != len(model_labels):
319
- return (
320
- gr.update(value=UNMATCHED_MODEL_DATASET_STYLED_ERROR, visible=True),
321
- gr.update(visible=False),
322
- gr.update(visible=False),
323
- gr.update(visible=False, open=False),
324
- gr.update(interactive=False),
325
- "",
326
- *dropdown_placement,
327
- )
328
-
329
- column_mappings = list_labels_and_features_from_dataset(
330
- ds_labels,
331
- ds_features,
332
- model_labels,
333
- uid,
334
- )
335
-
336
- # when labels or features are not aligned
337
- # show manually column mapping
338
- if (
339
- collections.Counter(model_labels) != collections.Counter(ds_labels)
340
- or ds_features[0] != "text"
341
- ):
342
- return (
343
- gr.update(value=MAPPING_STYLED_ERROR_WARNING, visible=True),
344
- gr.update(value=prediction_input, lines=min(len(prediction_input)//225 + 1, 5), visible=True),
345
- gr.update(value=prediction_response, visible=True),
346
- gr.update(visible=True, open=True),
347
- gr.update(interactive=(profile is not None and oauth_token is not None)),
348
- "",
349
- *column_mappings,
350
- )
351
-
352
- return (
353
- gr.update(value=VALIDATED_MODEL_DATASET_STYLED, visible=True),
354
- gr.update(value=prediction_input, lines=min(len(prediction_input)//225 + 1, 5), visible=True),
355
- gr.update(value=prediction_response, visible=True),
356
- gr.update(visible=True, open=False),
357
- gr.update(interactive=(profile is not None and oauth_token is not None)),
358
- "",
359
- *column_mappings,
360
- )
361
-
362
-
363
- def check_column_mapping_keys_validity(all_mappings):
364
- if all_mappings is None:
365
- logger.warning("all_mapping is None")
366
- gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
367
- return False
368
-
369
- if "labels" not in all_mappings.keys():
370
- logger.warning(f"Label mapping is not valid, all_mappings: {all_mappings}")
371
- return False
372
-
373
- return True
374
-
375
- def enable_run_btn(uid, model_id, dataset_id, dataset_config, dataset_split, profile: gr.OAuthProfile | None, oath_token: gr.OAuthToken | None):
376
- if profile is None:
377
- return gr.update(interactive=False)
378
- if oath_token is None:
379
- return gr.update(interactive=False)
380
- if model_id == "" or dataset_id == "" or dataset_config == "" or dataset_split == "":
381
- logger.warn("Model id or dataset id is not selected")
382
- return gr.update(interactive=False)
383
-
384
- all_mappings = read_column_mapping(uid)
385
- if not check_column_mapping_keys_validity(all_mappings):
386
- logger.warn("Column mapping is not valid")
387
- return gr.update(interactive=False)
388
-
389
- def construct_label_and_feature_mapping(all_mappings, ds_labels, ds_features, label_keys=None):
390
- label_mapping = {}
391
- if len(all_mappings["labels"].keys()) != len(ds_labels):
392
- logger.warn(f"""Label mapping corrupted: {CONFIRM_MAPPING_DETAILS_FAIL_RAW}.
393
- \nall_mappings: {all_mappings}\nds_labels: {ds_labels}""")
394
-
395
- if len(all_mappings["features"].keys()) != len(ds_features):
396
- logger.warn(f"""Feature mapping corrupted: {CONFIRM_MAPPING_DETAILS_FAIL_RAW}.
397
- \nall_mappings: {all_mappings}\nds_features: {ds_features}""")
398
-
399
- for i, label in zip(range(len(ds_labels)), ds_labels):
400
- # align the saved labels with dataset labels order
401
- label_mapping.update({str(i): all_mappings["labels"][label]})
402
-
403
- if "features" not in all_mappings.keys():
404
- logger.warning("features not in all_mappings")
405
- gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
406
-
407
- feature_mapping = all_mappings["features"]
408
- if len(label_keys) > 0:
409
- feature_mapping.update({"label": label_keys[0]})
410
- return label_mapping, feature_mapping
411
-
412
- def show_hf_token_info(token):
413
- valid = check_hf_token_validity(token)
414
- if not valid:
415
- return gr.update(visible=True)
416
- return gr.update(visible=False)
417
-
418
- def try_submit(m_id, d_id, config, split, uid, profile: gr.OAuthProfile | None, oath_token: gr.OAuthToken | None):
419
- all_mappings = read_column_mapping(uid)
420
- if not check_column_mapping_keys_validity(all_mappings):
421
- return (gr.update(interactive=True), gr.update(visible=False))
422
-
423
- # get ds labels and features again for alignment
424
- ds = datasets.load_dataset(d_id, config, split=split, trust_remote_code=True)
425
- ds_labels, ds_features, label_keys = get_labels_and_features_from_dataset(ds)
426
- label_mapping, feature_mapping = construct_label_and_feature_mapping(all_mappings, ds_labels, ds_features, label_keys)
427
-
428
- eval_str = f"[{m_id}]<{d_id}({config}, {split} set)>"
429
- save_job_to_pipe(
430
- uid,
431
- (
432
- m_id,
433
- d_id,
434
- config,
435
- split,
436
- oath_token.token,
437
- uid,
438
- label_mapping,
439
- feature_mapping,
440
- ),
441
- eval_str,
442
- threading.Lock(),
443
- )
444
- gr.Info("Your evaluation has been submitted")
445
-
446
- new_uid = uuid.uuid4()
447
- scanners = read_scanners(uid)
448
- write_scanners(scanners, new_uid)
449
-
450
- return (
451
- gr.update(interactive=False), # Submit button
452
- gr.update(value=f"{CHECK_LOG_SECTION_RAW}Your job id is: {uid}. ", lines=5, visible=True, interactive=False),
453
- new_uid, # Allocate a new uuid
454
- gr.update(visible=False),
455
- gr.update(visible=False),
456
- gr.update(visible=False),
457
- gr.update(visible=False),
458
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils.py DELETED
@@ -1,29 +0,0 @@
1
- import sys
2
-
3
- import yaml
4
-
5
-
6
- # read scanners from yaml file
7
- # return a list of scanners
8
- def read_scanners(path):
9
- scanners = []
10
- with open(path, "r") as f:
11
- config = yaml.load(f, Loader=yaml.FullLoader)
12
- scanners = config.get("detectors", None)
13
- return scanners
14
-
15
-
16
- # convert a list of scanners to yaml file
17
- def write_scanners(scanners):
18
- with open("./scan_config.yaml", "w") as f:
19
- # save scanners to detectors in yaml
20
- yaml.dump({"detectors": scanners}, f)
21
-
22
-
23
- # convert column mapping dataframe to json
24
- def convert_column_mapping_to_json(df, label=""):
25
- column_mapping = {}
26
- column_mapping[label] = []
27
- for _, row in df.iterrows():
28
- column_mapping[label].append(row.tolist())
29
- return column_mapping
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils/io_utils.py CHANGED
@@ -1,25 +1,15 @@
1
  import os
2
- import logging
3
  import yaml
4
 
5
- YAML_PATH = "../cicd/configs"
6
- LOG_FILE = "../temp_log"
7
 
8
- logger = logging.getLogger(__name__)
9
 
10
  class Dumper(yaml.Dumper):
11
  def increase_indent(self, flow=False, *args, **kwargs):
12
  return super().increase_indent(flow=flow, indentless=False)
13
 
14
- def get_submitted_yaml_path(uid):
15
- if not os.path.exists(f"{YAML_PATH}/submitted"):
16
- os.makedirs(f"{YAML_PATH}/submitted")
17
- if not os.path.exists(f"{YAML_PATH}/{uid}_config.yaml"):
18
- logger.error(f"config.yaml does not exist for {uid}")
19
- os.system(f"cp config.yaml {YAML_PATH}/{uid}_config.yaml")
20
- if not os.path.exists(f"{YAML_PATH}/submitted/{uid}_config.yaml"):
21
- os.system(f"cp {YAML_PATH}/{uid}_config.yaml {YAML_PATH}/submitted/{uid}_config.yaml")
22
- return f"{YAML_PATH}/submitted/{uid}_config.yaml"
23
 
24
  def get_yaml_path(uid):
25
  if not os.path.exists(YAML_PATH):
@@ -82,8 +72,6 @@ def read_column_mapping(uid):
82
  config = yaml.load(f, Loader=yaml.FullLoader)
83
  if config:
84
  column_mapping = config.get("column_mapping", dict())
85
- if column_mapping is None:
86
- column_mapping = {}
87
  return column_mapping
88
 
89
 
 
1
  import os
2
+
3
  import yaml
4
 
5
+ YAML_PATH = "./cicd/configs"
6
+ LOG_FILE = "temp_log"
7
 
 
8
 
9
  class Dumper(yaml.Dumper):
10
  def increase_indent(self, flow=False, *args, **kwargs):
11
  return super().increase_indent(flow=flow, indentless=False)
12
 
 
 
 
 
 
 
 
 
 
13
 
14
  def get_yaml_path(uid):
15
  if not os.path.exists(YAML_PATH):
 
72
  config = yaml.load(f, Loader=yaml.FullLoader)
73
  if config:
74
  column_mapping = config.get("column_mapping", dict())
 
 
75
  return column_mapping
76
 
77
 
utils/run_jobs.py CHANGED
@@ -17,7 +17,7 @@ from app_env import (
17
  HF_SPACE_ID,
18
  HF_WRITE_TOKEN,
19
  )
20
- from io_utils import LOG_FILE, get_submitted_yaml_path, write_log_to_user_file
21
  from isolated_env import prepare_venv
22
  from utils.leaderboard import LEADERBOARD
23
 
@@ -50,6 +50,7 @@ def prepare_env_and_get_command(
50
  d_id,
51
  config,
52
  split,
 
53
  inference_token,
54
  uid,
55
  label_mapping,
@@ -59,6 +60,10 @@ def prepare_env_and_get_command(
59
  if os.environ.get("SPACE_ID") == "giskardai/giskard-evaluator":
60
  leaderboard_dataset = LEADERBOARD
61
 
 
 
 
 
62
  executable = "giskard_scanner"
63
  try:
64
  # Copy the current requirements (might be changed)
@@ -93,9 +98,9 @@ def prepare_env_and_get_command(
93
  "--label_mapping",
94
  json.dumps(label_mapping),
95
  "--scan_config",
96
- get_submitted_yaml_path(uid),
97
  "--inference_type",
98
- "hf_inference_api",
99
  "--inference_api_token",
100
  inference_token,
101
  ]
 
17
  HF_SPACE_ID,
18
  HF_WRITE_TOKEN,
19
  )
20
+ from utils.io_utils import LOG_FILE, get_yaml_path, write_log_to_user_file
21
  from isolated_env import prepare_venv
22
  from utils.leaderboard import LEADERBOARD
23
 
 
50
  d_id,
51
  config,
52
  split,
53
+ inference,
54
  inference_token,
55
  uid,
56
  label_mapping,
 
60
  if os.environ.get("SPACE_ID") == "giskardai/giskard-evaluator":
61
  leaderboard_dataset = LEADERBOARD
62
 
63
+ inference_type = "hf_pipeline"
64
+ if inference and inference_token:
65
+ inference_type = "hf_inference_api"
66
+
67
  executable = "giskard_scanner"
68
  try:
69
  # Copy the current requirements (might be changed)
 
98
  "--label_mapping",
99
  json.dumps(label_mapping),
100
  "--scan_config",
101
+ get_yaml_path(uid),
102
  "--inference_type",
103
+ inference_type,
104
  "--inference_api_token",
105
  inference_token,
106
  ]
utils/ui_helpers.py CHANGED
@@ -7,15 +7,10 @@ import datasets
7
  import gradio as gr
8
  import pandas as pd
9
 
10
- import leaderboard
11
- from io_utils import (
12
- read_column_mapping,
13
- write_column_mapping,
14
- read_scanners,
15
- write_scanners,
16
- )
17
- from run_jobs import save_job_to_pipe
18
- from text_classification import (
19
  strip_model_id_from_url,
20
  check_model_task,
21
  preload_hf_inference_api,
@@ -31,11 +26,10 @@ from utils.wordings import (
31
  NOT_TEXT_CLASSIFICATION_MODEL_RAW,
32
  UNMATCHED_MODEL_DATASET_STYLED_ERROR,
33
  CHECK_LOG_SECTION_RAW,
34
- VALIDATED_MODEL_DATASET_STYLED,
35
  get_dataset_fetch_error_raw,
36
  )
37
  import os
38
- from app_env import HF_WRITE_TOKEN
39
 
40
  MAX_LABELS = 40
41
  MAX_FEATURES = 20
@@ -53,20 +47,9 @@ def get_related_datasets_from_leaderboard(model_id):
53
  datasets_unique = list(model_records["dataset_id"].unique())
54
 
55
  if len(datasets_unique) == 0:
56
- return gr.update(choices=[])
57
 
58
- return gr.update(choices=datasets_unique)
59
-
60
-
61
- logger = logging.getLogger(__file__)
62
-
63
- def get_dataset_splits(dataset_id, dataset_config):
64
- try:
65
- splits = datasets.get_dataset_split_names(dataset_id, dataset_config, trust_remote_code=True)
66
- return gr.update(choices=splits, value=splits[0], visible=True)
67
- except Exception as e:
68
- logger.warn(f"Check your dataset {dataset_id} and config {dataset_config}: {e}")
69
- return gr.update(visible=False)
70
 
71
  def check_dataset(dataset_id):
72
  logger.info(f"Loading {dataset_id}")
@@ -78,7 +61,9 @@ def check_dataset(dataset_id):
78
  gr.update(visible=False),
79
  ""
80
  )
81
- splits = datasets.get_dataset_split_names(dataset_id, configs[0], trust_remote_code=True)
 
 
82
  return (
83
  gr.update(choices=configs, value=configs[0], visible=True),
84
  gr.update(choices=splits, value=splits[0], visible=True),
@@ -140,7 +125,7 @@ def list_labels_and_features_from_dataset(ds_labels, ds_features, model_labels,
140
  ds_labels = list(shared_labels)
141
  if len(ds_labels) > MAX_LABELS:
142
  ds_labels = ds_labels[:MAX_LABELS]
143
- gr.Warning(f"Too many labels to display for this spcae. We do not support more than {MAX_LABELS} in this space. You can use cli tool at https://github.com/Giskard-AI/cicd.")
144
 
145
  # sort labels to make sure the order is consistent
146
  # prediction gives the order based on probability
@@ -181,67 +166,33 @@ def list_labels_and_features_from_dataset(ds_labels, ds_features, model_labels,
181
 
182
  def precheck_model_ds_enable_example_btn(
183
  model_id, dataset_id, dataset_config, dataset_split
184
- ):
 
 
185
  model_id = strip_model_id_from_url(model_id)
186
  model_task = check_model_task(model_id)
187
  preload_hf_inference_api(model_id)
188
-
 
 
 
189
  if dataset_config is None or dataset_split is None or len(dataset_config) == 0:
190
- return (
191
- gr.update(interactive=False),
192
- gr.update(visible=False),
193
- gr.update(visible=False),
194
- gr.update(visible=False),
195
- gr.update(visible=False),
196
- gr.update(visible=False),
197
- )
198
-
199
  try:
200
  ds = datasets.load_dataset(dataset_id, dataset_config, trust_remote_code=True)
201
  df: pd.DataFrame = ds[dataset_split].to_pandas().head(5)
202
- ds_labels, ds_features, _ = get_labels_and_features_from_dataset(ds[dataset_split])
203
-
204
- if model_task is None or model_task != "text-classification":
205
- gr.Warning(NOT_TEXT_CLASSIFICATION_MODEL_RAW)
206
- return (
207
- gr.update(interactive=False),
208
- gr.update(value=df, visible=True),
209
- gr.update(visible=False),
210
- gr.update(visible=False),
211
- gr.update(visible=False),
212
- gr.update(visible=False),
213
- )
214
 
215
  if not isinstance(ds_labels, list) or not isinstance(ds_features, list):
216
  gr.Warning(CHECK_CONFIG_OR_SPLIT_RAW)
217
- return (
218
- gr.update(interactive=False),
219
- gr.update(value=df, visible=True),
220
- gr.update(visible=False),
221
- gr.update(visible=False),
222
- gr.update(visible=False),
223
- gr.update(visible=False),
224
- )
225
 
226
- return (
227
- gr.update(interactive=True),
228
- gr.update(value=df, visible=True),
229
- gr.update(visible=False),
230
- gr.update(visible=False),
231
- gr.update(visible=False),
232
- gr.update(visible=False),
233
- )
234
  except Exception as e:
235
  # Config or split wrong
236
  logger.warn(f"Check your dataset {dataset_id} and config {dataset_config} on split {dataset_split}: {e}")
237
- return (
238
- gr.update(interactive=False),
239
- gr.update(visible=False),
240
- gr.update(visible=False),
241
- gr.update(visible=False),
242
- gr.update(visible=False),
243
- gr.update(visible=False),
244
- )
245
 
246
 
247
  def align_columns_and_show_prediction(
@@ -250,8 +201,8 @@ def align_columns_and_show_prediction(
250
  dataset_config,
251
  dataset_split,
252
  uid,
253
- profile: gr.OAuthProfile | None,
254
- oauth_token: gr.OAuthToken | None,
255
  ):
256
  model_id = strip_model_id_from_url(model_id)
257
  model_task = check_model_task(model_id)
@@ -270,7 +221,7 @@ def align_columns_and_show_prediction(
270
  gr.Dropdown(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)
271
  ]
272
 
273
- hf_token = os.environ.get(HF_WRITE_TOKEN, default="")
274
 
275
  prediction_input, prediction_response = get_example_prediction(
276
  model_id, dataset_id, dataset_config, dataset_split, hf_token
@@ -278,7 +229,6 @@ def align_columns_and_show_prediction(
278
 
279
  if prediction_input is None or prediction_response is None:
280
  return (
281
- gr.update(visible=False),
282
  gr.update(visible=False),
283
  gr.update(visible=False),
284
  gr.update(visible=False, open=False),
@@ -289,7 +239,6 @@ def align_columns_and_show_prediction(
289
 
290
  if isinstance(prediction_response, HuggingFaceInferenceAPIResponse):
291
  return (
292
- gr.update(visible=False),
293
  gr.update(visible=False),
294
  gr.update(visible=False),
295
  gr.update(visible=False, open=False),
@@ -301,13 +250,12 @@ def align_columns_and_show_prediction(
301
  model_labels = list(prediction_response.keys())
302
 
303
  ds = datasets.load_dataset(dataset_id, dataset_config, split=dataset_split, trust_remote_code=True)
304
- ds_labels, ds_features, _ = get_labels_and_features_from_dataset(ds)
305
 
306
  # when dataset does not have labels or features
307
  if not isinstance(ds_labels, list) or not isinstance(ds_features, list):
308
  gr.Warning(CHECK_CONFIG_OR_SPLIT_RAW)
309
  return (
310
- gr.update(visible=False),
311
  gr.update(visible=False),
312
  gr.update(visible=False),
313
  gr.update(visible=False, open=False),
@@ -320,7 +268,6 @@ def align_columns_and_show_prediction(
320
  return (
321
  gr.update(value=UNMATCHED_MODEL_DATASET_STYLED_ERROR, visible=True),
322
  gr.update(visible=False),
323
- gr.update(visible=False),
324
  gr.update(visible=False, open=False),
325
  gr.update(interactive=False),
326
  "",
@@ -342,20 +289,18 @@ def align_columns_and_show_prediction(
342
  ):
343
  return (
344
  gr.update(value=MAPPING_STYLED_ERROR_WARNING, visible=True),
345
- gr.update(value=prediction_input, lines=min(len(prediction_input)//225 + 1, 5), visible=True),
346
- gr.update(value=prediction_response, visible=True),
347
  gr.update(visible=True, open=True),
348
- gr.update(interactive=(profile is not None and oauth_token is not None)),
349
  "",
350
  *column_mappings,
351
  )
352
 
353
  return (
354
- gr.update(value=VALIDATED_MODEL_DATASET_STYLED, visible=True),
355
- gr.update(value=prediction_input, lines=min(len(prediction_input)//225 + 1, 5), visible=True),
356
  gr.update(value=prediction_response, visible=True),
357
  gr.update(visible=True, open=False),
358
- gr.update(interactive=(profile is not None and oauth_token is not None)),
359
  "",
360
  *column_mappings,
361
  )
@@ -363,20 +308,18 @@ def align_columns_and_show_prediction(
363
 
364
  def check_column_mapping_keys_validity(all_mappings):
365
  if all_mappings is None:
366
- logger.warning("all_mapping is None")
367
  gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
368
  return False
369
 
370
  if "labels" not in all_mappings.keys():
371
- logger.warning(f"Label mapping is not valid, all_mappings: {all_mappings}")
372
  return False
373
 
374
  return True
375
 
376
- def enable_run_btn(uid, model_id, dataset_id, dataset_config, dataset_split, profile: gr.OAuthProfile | None, oath_token: gr.OAuthToken | None):
377
- if profile is None:
378
- return gr.update(interactive=False)
379
- if oath_token is None:
380
  return gr.update(interactive=False)
381
  if model_id == "" or dataset_id == "" or dataset_config == "" or dataset_split == "":
382
  logger.warn("Model id or dataset id is not selected")
@@ -387,27 +330,26 @@ def enable_run_btn(uid, model_id, dataset_id, dataset_config, dataset_split, pro
387
  logger.warn("Column mapping is not valid")
388
  return gr.update(interactive=False)
389
 
390
- def construct_label_and_feature_mapping(all_mappings, ds_labels, ds_features, label_keys=None):
 
 
 
 
 
391
  label_mapping = {}
392
  if len(all_mappings["labels"].keys()) != len(ds_labels):
393
- logger.warn(f"""Label mapping corrupted: {CONFIRM_MAPPING_DETAILS_FAIL_RAW}.
394
- \nall_mappings: {all_mappings}\nds_labels: {ds_labels}""")
395
 
396
  if len(all_mappings["features"].keys()) != len(ds_features):
397
- logger.warn(f"""Feature mapping corrupted: {CONFIRM_MAPPING_DETAILS_FAIL_RAW}.
398
- \nall_mappings: {all_mappings}\nds_features: {ds_features}""")
399
 
400
  for i, label in zip(range(len(ds_labels)), ds_labels):
401
  # align the saved labels with dataset labels order
402
  label_mapping.update({str(i): all_mappings["labels"][label]})
403
 
404
  if "features" not in all_mappings.keys():
405
- logger.warning("features not in all_mappings")
406
  gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
407
-
408
  feature_mapping = all_mappings["features"]
409
- if len(label_keys) > 0:
410
- feature_mapping.update({"label": label_keys[0]})
411
  return label_mapping, feature_mapping
412
 
413
  def show_hf_token_info(token):
@@ -416,18 +358,16 @@ def show_hf_token_info(token):
416
  return gr.update(visible=True)
417
  return gr.update(visible=False)
418
 
419
- def try_submit(m_id, d_id, config, split, uid, profile: gr.OAuthProfile | None, oath_token: gr.OAuthToken | None):
420
- print(oath_token.token)
421
- print(".>>>>>>>>>>>>>>>>>>>>>>")
422
  all_mappings = read_column_mapping(uid)
423
  if not check_column_mapping_keys_validity(all_mappings):
424
  return (gr.update(interactive=True), gr.update(visible=False))
425
 
426
  # get ds labels and features again for alignment
427
  ds = datasets.load_dataset(d_id, config, split=split, trust_remote_code=True)
428
- ds_labels, ds_features, label_keys = get_labels_and_features_from_dataset(ds)
429
- label_mapping, feature_mapping = construct_label_and_feature_mapping(all_mappings, ds_labels, ds_features, label_keys)
430
-
431
  eval_str = f"[{m_id}]<{d_id}({config}, {split} set)>"
432
  save_job_to_pipe(
433
  uid,
@@ -436,7 +376,8 @@ def try_submit(m_id, d_id, config, split, uid, profile: gr.OAuthProfile | None,
436
  d_id,
437
  config,
438
  split,
439
- oath_token.token,
 
440
  uid,
441
  label_mapping,
442
  feature_mapping,
@@ -446,16 +387,8 @@ def try_submit(m_id, d_id, config, split, uid, profile: gr.OAuthProfile | None,
446
  )
447
  gr.Info("Your evaluation has been submitted")
448
 
449
- new_uid = uuid.uuid4()
450
- scanners = read_scanners(uid)
451
- write_scanners(scanners, new_uid)
452
-
453
  return (
454
  gr.update(interactive=False), # Submit button
455
  gr.update(value=f"{CHECK_LOG_SECTION_RAW}Your job id is: {uid}. ", lines=5, visible=True, interactive=False),
456
- new_uid, # Allocate a new uuid
457
- gr.update(visible=False),
458
- gr.update(visible=False),
459
- gr.update(visible=False),
460
- gr.update(visible=False),
461
  )
 
7
  import gradio as gr
8
  import pandas as pd
9
 
10
+ import utils.leaderboard as leaderboard
11
+ from utils.io_utils import read_column_mapping, write_column_mapping
12
+ from utils.run_jobs import save_job_to_pipe
13
+ from utils.text_classification import (
 
 
 
 
 
14
  strip_model_id_from_url,
15
  check_model_task,
16
  preload_hf_inference_api,
 
26
  NOT_TEXT_CLASSIFICATION_MODEL_RAW,
27
  UNMATCHED_MODEL_DATASET_STYLED_ERROR,
28
  CHECK_LOG_SECTION_RAW,
29
+ get_styled_input,
30
  get_dataset_fetch_error_raw,
31
  )
32
  import os
 
33
 
34
  MAX_LABELS = 40
35
  MAX_FEATURES = 20
 
47
  datasets_unique = list(model_records["dataset_id"].unique())
48
 
49
  if len(datasets_unique) == 0:
50
+ return gr.update(choices=[], value="")
51
 
52
+ return gr.update(choices=datasets_unique, value="")
 
 
 
 
 
 
 
 
 
 
 
53
 
54
  def check_dataset(dataset_id):
55
  logger.info(f"Loading {dataset_id}")
 
61
  gr.update(visible=False),
62
  ""
63
  )
64
+ splits = datasets.get_dataset_split_names(
65
+ dataset_id, configs[0], trust_remote_code=True
66
+ )
67
  return (
68
  gr.update(choices=configs, value=configs[0], visible=True),
69
  gr.update(choices=splits, value=splits[0], visible=True),
 
125
  ds_labels = list(shared_labels)
126
  if len(ds_labels) > MAX_LABELS:
127
  ds_labels = ds_labels[:MAX_LABELS]
128
+ gr.Warning(f"The number of labels is truncated to length {MAX_LABELS}")
129
 
130
  # sort labels to make sure the order is consistent
131
  # prediction gives the order based on probability
 
166
 
167
  def precheck_model_ds_enable_example_btn(
168
  model_id, dataset_id, dataset_config, dataset_split
169
+ ):
170
+ if model_id == "" or dataset_id == "":
171
+ return (gr.update(interactive=False), gr.update(visible=False), "")
172
  model_id = strip_model_id_from_url(model_id)
173
  model_task = check_model_task(model_id)
174
  preload_hf_inference_api(model_id)
175
+ if model_task is None or model_task != "text-classification":
176
+ gr.Warning(NOT_TEXT_CLASSIFICATION_MODEL_RAW)
177
+ return (gr.update(interactive=False), gr.update(visible=False), "")
178
+
179
  if dataset_config is None or dataset_split is None or len(dataset_config) == 0:
180
+ return (gr.update(interactive=False), gr.update(visible=False), "")
181
+
 
 
 
 
 
 
 
182
  try:
183
  ds = datasets.load_dataset(dataset_id, dataset_config, trust_remote_code=True)
184
  df: pd.DataFrame = ds[dataset_split].to_pandas().head(5)
185
+ ds_labels, ds_features = get_labels_and_features_from_dataset(ds[dataset_split])
 
 
 
 
 
 
 
 
 
 
 
186
 
187
  if not isinstance(ds_labels, list) or not isinstance(ds_features, list):
188
  gr.Warning(CHECK_CONFIG_OR_SPLIT_RAW)
189
+ return (gr.update(interactive=False), gr.update(value=df, visible=True), "")
 
 
 
 
 
 
 
190
 
191
+ return (gr.update(interactive=True), gr.update(value=df, visible=True), "")
 
 
 
 
 
 
 
192
  except Exception as e:
193
  # Config or split wrong
194
  logger.warn(f"Check your dataset {dataset_id} and config {dataset_config} on split {dataset_split}: {e}")
195
+ return (gr.update(interactive=False), gr.update(value=pd.DataFrame(), visible=False), "")
 
 
 
 
 
 
 
196
 
197
 
198
  def align_columns_and_show_prediction(
 
201
  dataset_config,
202
  dataset_split,
203
  uid,
204
+ run_inference,
205
+ inference_token,
206
  ):
207
  model_id = strip_model_id_from_url(model_id)
208
  model_task = check_model_task(model_id)
 
221
  gr.Dropdown(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)
222
  ]
223
 
224
+ hf_token = os.environ.get("HF_WRITE_TOKEN", default="")
225
 
226
  prediction_input, prediction_response = get_example_prediction(
227
  model_id, dataset_id, dataset_config, dataset_split, hf_token
 
229
 
230
  if prediction_input is None or prediction_response is None:
231
  return (
 
232
  gr.update(visible=False),
233
  gr.update(visible=False),
234
  gr.update(visible=False, open=False),
 
239
 
240
  if isinstance(prediction_response, HuggingFaceInferenceAPIResponse):
241
  return (
 
242
  gr.update(visible=False),
243
  gr.update(visible=False),
244
  gr.update(visible=False, open=False),
 
250
  model_labels = list(prediction_response.keys())
251
 
252
  ds = datasets.load_dataset(dataset_id, dataset_config, split=dataset_split, trust_remote_code=True)
253
+ ds_labels, ds_features = get_labels_and_features_from_dataset(ds)
254
 
255
  # when dataset does not have labels or features
256
  if not isinstance(ds_labels, list) or not isinstance(ds_features, list):
257
  gr.Warning(CHECK_CONFIG_OR_SPLIT_RAW)
258
  return (
 
259
  gr.update(visible=False),
260
  gr.update(visible=False),
261
  gr.update(visible=False, open=False),
 
268
  return (
269
  gr.update(value=UNMATCHED_MODEL_DATASET_STYLED_ERROR, visible=True),
270
  gr.update(visible=False),
 
271
  gr.update(visible=False, open=False),
272
  gr.update(interactive=False),
273
  "",
 
289
  ):
290
  return (
291
  gr.update(value=MAPPING_STYLED_ERROR_WARNING, visible=True),
292
+ gr.update(visible=False),
 
293
  gr.update(visible=True, open=True),
294
+ gr.update(interactive=(run_inference and inference_token != "")),
295
  "",
296
  *column_mappings,
297
  )
298
 
299
  return (
300
+ gr.update(value=get_styled_input(prediction_input), visible=True),
 
301
  gr.update(value=prediction_response, visible=True),
302
  gr.update(visible=True, open=False),
303
+ gr.update(interactive=(run_inference and inference_token != "")),
304
  "",
305
  *column_mappings,
306
  )
 
308
 
309
  def check_column_mapping_keys_validity(all_mappings):
310
  if all_mappings is None:
 
311
  gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
312
  return False
313
 
314
  if "labels" not in all_mappings.keys():
315
+ gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
316
  return False
317
 
318
  return True
319
 
320
+ def enable_run_btn(uid, run_inference, inference_token, model_id, dataset_id, dataset_config, dataset_split):
321
+ if not run_inference or inference_token == "":
322
+ logger.warn("Inference API is not enabled")
 
323
  return gr.update(interactive=False)
324
  if model_id == "" or dataset_id == "" or dataset_config == "" or dataset_split == "":
325
  logger.warn("Model id or dataset id is not selected")
 
330
  logger.warn("Column mapping is not valid")
331
  return gr.update(interactive=False)
332
 
333
+ if not check_hf_token_validity(inference_token):
334
+ logger.warn("HF token is not valid")
335
+ return gr.update(interactive=False)
336
+ return gr.update(interactive=True)
337
+
338
+ def construct_label_and_feature_mapping(all_mappings, ds_labels, ds_features):
339
  label_mapping = {}
340
  if len(all_mappings["labels"].keys()) != len(ds_labels):
341
+ logger.warn("Label mapping corrupted: " + CONFIRM_MAPPING_DETAILS_FAIL_RAW)
 
342
 
343
  if len(all_mappings["features"].keys()) != len(ds_features):
344
+ logger.warn("Feature mapping corrupted: " + CONFIRM_MAPPING_DETAILS_FAIL_RAW)
 
345
 
346
  for i, label in zip(range(len(ds_labels)), ds_labels):
347
  # align the saved labels with dataset labels order
348
  label_mapping.update({str(i): all_mappings["labels"][label]})
349
 
350
  if "features" not in all_mappings.keys():
 
351
  gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
 
352
  feature_mapping = all_mappings["features"]
 
 
353
  return label_mapping, feature_mapping
354
 
355
  def show_hf_token_info(token):
 
358
  return gr.update(visible=True)
359
  return gr.update(visible=False)
360
 
361
+ def try_submit(m_id, d_id, config, split, inference, inference_token, uid):
 
 
362
  all_mappings = read_column_mapping(uid)
363
  if not check_column_mapping_keys_validity(all_mappings):
364
  return (gr.update(interactive=True), gr.update(visible=False))
365
 
366
  # get ds labels and features again for alignment
367
  ds = datasets.load_dataset(d_id, config, split=split, trust_remote_code=True)
368
+ ds_labels, ds_features = get_labels_and_features_from_dataset(ds)
369
+ label_mapping, feature_mapping = construct_label_and_feature_mapping(all_mappings, ds_labels, ds_features)
370
+
371
  eval_str = f"[{m_id}]<{d_id}({config}, {split} set)>"
372
  save_job_to_pipe(
373
  uid,
 
376
  d_id,
377
  config,
378
  split,
379
+ inference,
380
+ inference_token,
381
  uid,
382
  label_mapping,
383
  feature_mapping,
 
387
  )
388
  gr.Info("Your evaluation has been submitted")
389
 
 
 
 
 
390
  return (
391
  gr.update(interactive=False), # Submit button
392
  gr.update(value=f"{CHECK_LOG_SECTION_RAW}Your job id is: {uid}. ", lines=5, visible=True, interactive=False),
393
+ uuid.uuid4(), # Allocate a new uuid
 
 
 
 
394
  )
utils/wordings.py CHANGED
@@ -2,24 +2,23 @@ INTRODUCTION_MD = """
2
  <h1 style="text-align: center;">
3
  🐒Giskard Evaluator - Text Classification
4
  </h1>
5
- Welcome to the Giskard Evaluator Space! Get a model vulnerability report immediately by simply sharing your model and dataset id below.
6
- You can also checkout our library documentation <a href="https://docs.giskard.ai/en/latest/getting_started/quickstart/index.html">here</a>.
7
  """
8
  CONFIRM_MAPPING_DETAILS_MD = """
9
  <h1 style="text-align: center;">
10
  Confirm Pre-processing Details
11
  </h1>
12
- Make sure the output variable's labels and the input variable's name are accurately mapped across both the dataset and the model. You can select the output variable's labels from the dropdowns below.
13
  """
14
  CONFIRM_MAPPING_DETAILS_FAIL_MD = """
15
  <h1 style="text-align: center;">
16
  Confirm Pre-processing Details
17
  </h1>
18
- We're unable to automatically map the input variable's name and output variable's labels of your dataset with the model's. Please manually check the mapping below.
19
  """
20
 
21
  CONFIRM_MAPPING_DETAILS_FAIL_RAW = """
22
- We're unable to automatically map the input variable's name and output variable's labels of your dataset with the model's. Please manually check the mapping below.
23
  """
24
 
25
  CHECK_CONFIG_OR_SPLIT_RAW = """
@@ -39,7 +38,7 @@ PREDICTION_SAMPLE_MD = """
39
 
40
  MAPPING_STYLED_ERROR_WARNING = """
41
  <h3 style="text-align: center;color: orange; background-color: #fff0f3; border-radius: 8px; padding: 10px; ">
42
- ⚠️ We're unable to automatically map the input variable's name and output variable's labels of your dataset with the model's. Please manually check the mapping below.
43
  </h3>
44
  """
45
 
@@ -58,11 +57,7 @@ USE_INFERENCE_API_TIP = """
58
  <a href="https://huggingface.co/docs/api-inference/detailed_parameters#text-classification-task">
59
  Hugging Face Inference API
60
  </a>
61
- . Please input your <a href="https://huggingface.co/settings/tokens">Hugging Face token</a> to do so. You can find it <a href="https://huggingface.co/settings/tokens">here</a>.
62
- """
63
-
64
- LOG_IN_TIPS = """
65
- To use the Hugging Face Inference API, you need to log in to your Hugging Face account.
66
  """
67
 
68
  HF_TOKEN_INVALID_STYLED= """
@@ -71,10 +66,10 @@ HF_TOKEN_INVALID_STYLED= """
71
  </p>
72
  """
73
 
74
- VALIDATED_MODEL_DATASET_STYLED = """
75
- <h3 style="text-align: center;color: #4ca154; background-color: #e2fbe8; border-radius: 8px; padding: 10px; ">
76
- Your model and dataset have been validated!
77
- </h3>"""
78
-
79
  def get_dataset_fetch_error_raw(error):
80
  return f"""Sorry you cannot use this dataset because {error}. Contact HF team to support this dataset."""
 
 
 
 
 
 
2
  <h1 style="text-align: center;">
3
  🐒Giskard Evaluator - Text Classification
4
  </h1>
5
+ Welcome to the Giskard Evaluator Space! Get a model vulnerability report immediately by simply sharing your model and dataset id below.
 
6
  """
7
  CONFIRM_MAPPING_DETAILS_MD = """
8
  <h1 style="text-align: center;">
9
  Confirm Pre-processing Details
10
  </h1>
11
+ Make sure the output variable's labels and the input variable's name are accurately mapped across both the dataset and the model.
12
  """
13
  CONFIRM_MAPPING_DETAILS_FAIL_MD = """
14
  <h1 style="text-align: center;">
15
  Confirm Pre-processing Details
16
  </h1>
17
+ We're unable to automatically map the input variable's name and output variable's labels of your dataset with the model's. <b>Please manually check the mapping below.</b>
18
  """
19
 
20
  CONFIRM_MAPPING_DETAILS_FAIL_RAW = """
21
+ We're unable to automatically map the input variable's name and output variable's labels of your dataset with the model's. <b>Please manually check the mapping below.</b>
22
  """
23
 
24
  CHECK_CONFIG_OR_SPLIT_RAW = """
 
38
 
39
  MAPPING_STYLED_ERROR_WARNING = """
40
  <h3 style="text-align: center;color: orange; background-color: #fff0f3; border-radius: 8px; padding: 10px; ">
41
+ ⚠️ We're unable to automatically map the input variable's name and output variable's labels of your dataset with the model's. <b>Please manually check the mapping below.</b>
42
  </h3>
43
  """
44
 
 
57
  <a href="https://huggingface.co/docs/api-inference/detailed_parameters#text-classification-task">
58
  Hugging Face Inference API
59
  </a>
60
+ . Please input your <a href="https://huggingface.co/settings/tokens">Hugging Face token</a> to do so.
 
 
 
 
61
  """
62
 
63
  HF_TOKEN_INVALID_STYLED= """
 
66
  </p>
67
  """
68
 
 
 
 
 
 
69
  def get_dataset_fetch_error_raw(error):
70
  return f"""Sorry you cannot use this dataset because {error}. Contact HF team to support this dataset."""
71
+
72
+ def get_styled_input(input):
73
+ return f"""<h3 style="text-align: center;color: #4ca154; background-color: #e2fbe8; border-radius: 8px; padding: 10px; ">
74
+ Your model and dataset have been validated! <br /> Sample input: {input}
75
+ </h3>"""
wordings.py DELETED
@@ -1,80 +0,0 @@
1
- INTRODUCTION_MD = """
2
- <div style="display: flex; justify-content: center;"><h1>
3
- 🐒Giskard Evaluator - Text Classification
4
- </h1></div>
5
- Welcome to the Giskard Evaluator Space! Get a model vulnerability report immediately by simply sharing your model and dataset id below.
6
- You can also checkout our library documentation <a href="https://docs.giskard.ai/en/latest/getting_started/quickstart/index.html">here</a>.
7
- """
8
- CONFIRM_MAPPING_DETAILS_MD = """
9
- <h1 style="text-align: center;">
10
- Confirm Pre-processing Details
11
- </h1>
12
- Make sure the output variable's labels and the input variable's name are accurately mapped across both the dataset and the model. You can select the output variable's labels from the dropdowns below.
13
- """
14
- CONFIRM_MAPPING_DETAILS_FAIL_MD = """
15
- <h1 style="text-align: center;">
16
- Confirm Pre-processing Details
17
- </h1>
18
- We're unable to automatically map the input variable's name and output variable's labels of your dataset with the model's. Please manually check the mapping below.
19
- """
20
-
21
- CONFIRM_MAPPING_DETAILS_FAIL_RAW = """
22
- We're unable to automatically map the input variable's name and output variable's labels of your dataset with the model's. Please manually check the mapping below.
23
- """
24
-
25
- CHECK_CONFIG_OR_SPLIT_RAW = """
26
- Please check your dataset config or split.
27
- """
28
-
29
- CHECK_LOG_SECTION_RAW = """
30
- Your have successfully submitted a Giskard evaluation. Further details are available in the Logs tab. You can find your report will be posted to your model's community discussion.
31
- """
32
-
33
- PREDICTION_SAMPLE_MD = """
34
- <h1 style="text-align: center;">
35
- Model Prediction Sample
36
- </h1>
37
- Here's a sample of your model's prediction on an example from the dataset.
38
- """
39
-
40
- MAPPING_STYLED_ERROR_WARNING = """
41
- <h3 style="text-align: center;color: orange; background-color: #fff0f3; border-radius: 8px; padding: 10px; ">
42
- ⚠️ We're unable to automatically map the input variable's name and output variable's labels of your dataset with the model's. Please manually check the mapping below.
43
- </h3>
44
- """
45
-
46
- UNMATCHED_MODEL_DATASET_STYLED_ERROR = """
47
- <h3 style="text-align: center;color: #fa5f5f; background-color: #fbe2e2; border-radius: 8px; padding: 10px; ">
48
- ❌ Your model and dataset have different numbers of labels. Please double check your model and dataset.
49
- </h3>
50
- """
51
-
52
- NOT_TEXT_CLASSIFICATION_MODEL_RAW = """
53
- Your model does not fall under the category of text classification. This page is specifically designated for the evaluation of text classification models.
54
- """
55
-
56
- USE_INFERENCE_API_TIP = """
57
- To speed up the evaluation, we recommend using the
58
- <a href="https://huggingface.co/docs/api-inference/detailed_parameters#text-classification-task">
59
- Hugging Face Inference API
60
- </a>
61
- . Please input your <a href="https://huggingface.co/settings/tokens">Hugging Face token</a> to do so. You can find it <a href="https://huggingface.co/settings/tokens">here</a>.
62
- """
63
-
64
- LOG_IN_TIPS = """
65
- To use the Hugging Face Inference API, you need to log in to your Hugging Face account.
66
- """
67
-
68
- HF_TOKEN_INVALID_STYLED= """
69
- <p style="text-align: left;color: red; ">
70
- Your Hugging Face token is invalid. Please double check your token.
71
- </p>
72
- """
73
-
74
- VALIDATED_MODEL_DATASET_STYLED = """
75
- <h3 style="text-align: center;color: #4ca154; background-color: #e2fbe8; border-radius: 8px; padding: 10px; ">
76
- Your model and dataset have been validated!
77
- </h3>"""
78
-
79
- def get_dataset_fetch_error_raw(error):
80
- return f"""Sorry you cannot use this dataset because {error}. Contact HF team to support this dataset."""