inoki-giskard commited on
Commit
58c39e0
·
1 Parent(s): e84a865

Format code and imports

Browse files
app.py CHANGED
@@ -1,9 +1,10 @@
1
  import atexit
 
2
  import gradio as gr
3
 
 
4
  from app_leaderboard import get_demo as get_demo_leaderboard
5
  from app_text_classification import get_demo as get_demo_text_classification
6
- from app_debug import get_demo as get_demo_debug
7
  from run_jobs import start_process_run_job, stop_thread
8
 
9
  try:
 
1
  import atexit
2
+
3
  import gradio as gr
4
 
5
+ from app_debug import get_demo as get_demo_debug
6
  from app_leaderboard import get_demo as get_demo_leaderboard
7
  from app_text_classification import get_demo as get_demo_text_classification
 
8
  from run_jobs import start_process_run_job, stop_thread
9
 
10
  try:
app_debug.py CHANGED
@@ -1,10 +1,14 @@
1
- import gradio as gr
2
- import pipe
3
  from os import listdir
4
  from os.path import isfile, join
 
 
 
 
 
5
  LOG_PATH = "./tmp"
6
  CONFIG_PATH = "./cicd/configs"
7
 
 
8
  def get_accordions_of_files(path, files):
9
  components = []
10
  for file in files:
@@ -15,14 +19,23 @@ def get_accordions_of_files(path, files):
15
  gr.Markdown(f.read())
16
  return components
17
 
 
18
  def get_accordions_of_log_files():
19
- log_files = [f for f in listdir(LOG_PATH) if isfile(join(LOG_PATH, f)) and f.endswith("_log")]
 
 
20
  return get_accordions_of_files(LOG_PATH, log_files)
21
 
 
22
  def get_accordions_of_config_files():
23
- config_files = [f for f in listdir(CONFIG_PATH) if isfile(join(CONFIG_PATH, f)) and f.endswith(".yaml")]
 
 
 
 
24
  return get_accordions_of_files(CONFIG_PATH, config_files)
25
 
 
26
  def get_demo(demo):
27
  with gr.Row():
28
  # check if jobs is an attribute of pipe
@@ -34,4 +47,3 @@ def get_demo(demo):
34
  with gr.Accordion(label="Log Files", open=False):
35
  log_accordions = get_accordions_of_log_files()
36
  demo.load(get_accordions_of_log_files, outputs=log_accordions, every=1)
37
-
 
 
 
1
  from os import listdir
2
  from os.path import isfile, join
3
+
4
+ import gradio as gr
5
+
6
+ import pipe
7
+
8
  LOG_PATH = "./tmp"
9
  CONFIG_PATH = "./cicd/configs"
10
 
11
+
12
  def get_accordions_of_files(path, files):
13
  components = []
14
  for file in files:
 
19
  gr.Markdown(f.read())
20
  return components
21
 
22
+
23
  def get_accordions_of_log_files():
24
+ log_files = [
25
+ f for f in listdir(LOG_PATH) if isfile(join(LOG_PATH, f)) and f.endswith("_log")
26
+ ]
27
  return get_accordions_of_files(LOG_PATH, log_files)
28
 
29
+
30
  def get_accordions_of_config_files():
31
+ config_files = [
32
+ f
33
+ for f in listdir(CONFIG_PATH)
34
+ if isfile(join(CONFIG_PATH, f)) and f.endswith(".yaml")
35
+ ]
36
  return get_accordions_of_files(CONFIG_PATH, config_files)
37
 
38
+
39
  def get_demo(demo):
40
  with gr.Row():
41
  # check if jobs is an attribute of pipe
 
47
  with gr.Accordion(label="Log Files", open=False):
48
  log_accordions = get_accordions_of_log_files()
49
  demo.load(get_accordions_of_log_files, outputs=log_accordions, every=1)
 
app_leaderboard.py CHANGED
@@ -3,7 +3,8 @@ import logging
3
  import datasets
4
  import gradio as gr
5
 
6
- from fetch_utils import check_dataset_and_get_config, check_dataset_and_get_split
 
7
 
8
 
9
  def get_records_from_dataset_repo(dataset_id):
@@ -94,7 +95,10 @@ def get_demo():
94
  label="Model id", choices=model_ids, value=model_ids[0], interactive=True
95
  )
96
  dataset_select = gr.Dropdown(
97
- label="Dataset id", choices=dataset_ids, value=dataset_ids[0], interactive=True
 
 
 
98
  )
99
 
100
  with gr.Row():
@@ -123,9 +127,9 @@ def get_demo():
123
  df = records[(records["task"] == task)]
124
  # filter the table based on the model_id and dataset_id
125
  if model_id and model_id != "Any":
126
- df = df[(df['model_id'] == model_id)]
127
  if dataset_id and dataset_id != "Any":
128
- df = df[(df['dataset_id'] == dataset_id)]
129
 
130
  # filter the table based on the columns
131
  df = df[columns]
 
3
  import datasets
4
  import gradio as gr
5
 
6
+ from fetch_utils import (check_dataset_and_get_config,
7
+ check_dataset_and_get_split)
8
 
9
 
10
  def get_records_from_dataset_repo(dataset_id):
 
95
  label="Model id", choices=model_ids, value=model_ids[0], interactive=True
96
  )
97
  dataset_select = gr.Dropdown(
98
+ label="Dataset id",
99
+ choices=dataset_ids,
100
+ value=dataset_ids[0],
101
+ interactive=True,
102
  )
103
 
104
  with gr.Row():
 
127
  df = records[(records["task"] == task)]
128
  # filter the table based on the model_id and dataset_id
129
  if model_id and model_id != "Any":
130
+ df = df[(df["model_id"] == model_id)]
131
  if dataset_id and dataset_id != "Any":
132
+ df = df[(df["dataset_id"] == dataset_id)]
133
 
134
  # filter the table based on the columns
135
  df = df[columns]
app_text_classification.py CHANGED
@@ -1,22 +1,16 @@
1
- import gradio as gr
2
  import uuid
3
- from io_utils import (
4
- read_scanners,
5
- write_scanners,
6
- read_inference_type,
7
- get_logs_file,
8
- write_inference_type,
9
- )
10
- from wordings import INTRODUCTION_MD, CONFIRM_MAPPING_DETAILS_MD
11
- from text_classification_ui_helpers import (
12
- try_submit,
13
- check_dataset_and_get_config,
14
- check_dataset_and_get_split,
15
- check_model_and_show_prediction,
16
- write_column_mapping_to_config,
17
- select_run_mode,
18
- deselect_run_inference,
19
- )
20
 
21
  MAX_LABELS = 20
22
  MAX_FEATURES = 20
@@ -70,17 +64,24 @@ def get_demo(demo):
70
  with gr.Accordion(label="Model Wrap Advance Config (optional)", open=False):
71
  run_local = gr.Checkbox(value=True, label="Run in this Space")
72
  run_inference = gr.Checkbox(value="False", label="Run with Inference API")
 
73
  @gr.on(triggers=[uid_label.change], inputs=[uid_label], outputs=[run_inference])
74
  def get_run_mode(uid):
75
- return (
76
- gr.update(value=read_inference_type(uid) == "hf_inference_api" and not run_local.value)
 
77
  )
78
- inference_token = gr.Textbox(value="", label="HF Token for Inference API", visible=False, interactive=True)
79
 
80
- with gr.Accordion(label="Scanner Advance Config (optional)", open=False):
81
- scanners = gr.CheckboxGroup(
82
- label="Scan Settings", visible=True
 
 
83
  )
 
 
 
 
84
  @gr.on(triggers=[uid_label.change], inputs=[uid_label], outputs=[scanners])
85
  def get_scanners(uid):
86
  selected = read_scanners(uid)
@@ -88,9 +89,9 @@ def get_demo(demo):
88
  # Reason: data_leakage barely raises any issues and takes too many requests
89
  # when using inference API, causing rate limit error
90
  scan_config = selected + ["data_leakage"]
91
- return (gr.update(
92
- choices=scan_config, value=selected, label="Scan Settings", visible=True
93
- ))
94
 
95
  with gr.Row():
96
  run_btn = gr.Button(
@@ -105,8 +106,9 @@ def get_demo(demo):
105
  demo.load(get_logs_file, uid_label, logs, every=0.5)
106
 
107
  dataset_id_input.change(
108
- check_dataset_and_get_config,
109
- inputs=[dataset_id_input], outputs=[dataset_config_input]
 
110
  )
111
 
112
  dataset_config_input.change(
@@ -118,19 +120,21 @@ def get_demo(demo):
118
  scanners.change(write_scanners, inputs=[scanners, uid_label])
119
 
120
  run_inference.change(
121
- select_run_mode,
122
- inputs=[run_inference, inference_token, uid_label],
123
- outputs=[inference_token, run_local])
124
-
 
125
  run_local.change(
126
- deselect_run_inference,
127
- inputs=[run_local],
128
- outputs=[inference_token, run_inference])
129
-
 
130
  inference_token.change(
131
- write_inference_type,
132
- inputs=[run_inference, inference_token, uid_label])
133
-
134
  gr.on(
135
  triggers=[label.change for label in column_mappings],
136
  fn=write_column_mapping_to_config,
 
 
1
  import uuid
2
+
3
+ import gradio as gr
4
+
5
+ from io_utils import (get_logs_file, read_inference_type, read_scanners,
6
+ write_inference_type, write_scanners)
7
+ from text_classification_ui_helpers import (check_dataset_and_get_config,
8
+ check_dataset_and_get_split,
9
+ check_model_and_show_prediction,
10
+ deselect_run_inference,
11
+ select_run_mode, try_submit,
12
+ write_column_mapping_to_config)
13
+ from wordings import CONFIRM_MAPPING_DETAILS_MD, INTRODUCTION_MD
 
 
 
 
 
14
 
15
  MAX_LABELS = 20
16
  MAX_FEATURES = 20
 
64
  with gr.Accordion(label="Model Wrap Advance Config (optional)", open=False):
65
  run_local = gr.Checkbox(value=True, label="Run in this Space")
66
  run_inference = gr.Checkbox(value="False", label="Run with Inference API")
67
+
68
  @gr.on(triggers=[uid_label.change], inputs=[uid_label], outputs=[run_inference])
69
  def get_run_mode(uid):
70
+ return gr.update(
71
+ value=read_inference_type(uid) == "hf_inference_api"
72
+ and not run_local.value
73
  )
 
74
 
75
+ inference_token = gr.Textbox(
76
+ value="",
77
+ label="HF Token for Inference API",
78
+ visible=False,
79
+ interactive=True,
80
  )
81
+
82
+ with gr.Accordion(label="Scanner Advance Config (optional)", open=False):
83
+ scanners = gr.CheckboxGroup(label="Scan Settings", visible=True)
84
+
85
  @gr.on(triggers=[uid_label.change], inputs=[uid_label], outputs=[scanners])
86
  def get_scanners(uid):
87
  selected = read_scanners(uid)
 
89
  # Reason: data_leakage barely raises any issues and takes too many requests
90
  # when using inference API, causing rate limit error
91
  scan_config = selected + ["data_leakage"]
92
+ return gr.update(
93
+ choices=scan_config, value=selected, label="Scan Settings", visible=True
94
+ )
95
 
96
  with gr.Row():
97
  run_btn = gr.Button(
 
106
  demo.load(get_logs_file, uid_label, logs, every=0.5)
107
 
108
  dataset_id_input.change(
109
+ check_dataset_and_get_config,
110
+ inputs=[dataset_id_input],
111
+ outputs=[dataset_config_input],
112
  )
113
 
114
  dataset_config_input.change(
 
120
  scanners.change(write_scanners, inputs=[scanners, uid_label])
121
 
122
  run_inference.change(
123
+ select_run_mode,
124
+ inputs=[run_inference, inference_token, uid_label],
125
+ outputs=[inference_token, run_local],
126
+ )
127
+
128
  run_local.change(
129
+ deselect_run_inference,
130
+ inputs=[run_local],
131
+ outputs=[inference_token, run_inference],
132
+ )
133
+
134
  inference_token.change(
135
+ write_inference_type, inputs=[run_inference, inference_token, uid_label]
136
+ )
137
+
138
  gr.on(
139
  triggers=[label.change for label in column_mappings],
140
  fn=write_column_mapping_to_config,
io_utils.py CHANGED
@@ -1,5 +1,6 @@
1
  import os
2
  import subprocess
 
3
  import yaml
4
 
5
  import pipe
@@ -44,7 +45,6 @@ def write_scanners(scanners, uid):
44
  f.close()
45
 
46
 
47
-
48
  # read model_type from yaml file
49
  def read_inference_type(uid):
50
  inference_type = ""
@@ -73,7 +73,6 @@ def write_inference_type(use_inference, inference_token, uid):
73
  f.close()
74
 
75
 
76
-
77
  # read column mapping from yaml file
78
  def read_column_mapping(uid):
79
  column_mapping = {}
@@ -103,7 +102,6 @@ def write_column_mapping(mapping, uid):
103
  f.close()
104
 
105
 
106
-
107
  # convert column mapping dataframe to json
108
  def convert_column_mapping_to_json(df, label=""):
109
  column_mapping = {}
 
1
  import os
2
  import subprocess
3
+
4
  import yaml
5
 
6
  import pipe
 
45
  f.close()
46
 
47
 
 
48
  # read model_type from yaml file
49
  def read_inference_type(uid):
50
  inference_type = ""
 
73
  f.close()
74
 
75
 
 
76
  # read column mapping from yaml file
77
  def read_column_mapping(uid):
78
  column_mapping = {}
 
102
  f.close()
103
 
104
 
 
105
  # convert column mapping dataframe to json
106
  def convert_column_mapping_to_json(df, label=""):
107
  column_mapping = {}
pipe.py CHANGED
@@ -1,4 +1,3 @@
1
-
2
  def init():
3
  global jobs
4
- jobs = list()
 
 
1
  def init():
2
  global jobs
3
+ jobs = list()
run_jobs.py CHANGED
@@ -1,6 +1,7 @@
 
1
  import threading
2
  import time
3
- import logging
4
  import pipe
5
  from io_utils import pop_job_from_pipe
6
 
 
1
+ import logging
2
  import threading
3
  import time
4
+
5
  import pipe
6
  from io_utils import pop_job_from_pipe
7
 
text_classification.py CHANGED
@@ -9,9 +9,9 @@ from transformers import pipeline
9
 
10
  def get_labels_and_features_from_dataset(dataset_id, dataset_config, split):
11
  if not dataset_config:
12
- dataset_config = 'default'
13
  if not split:
14
- split = 'train'
15
  try:
16
  ds = datasets.load_dataset(dataset_id, dataset_config)[split]
17
  dataset_features = ds.features
@@ -54,7 +54,7 @@ def text_classification_map_model_and_dataset_labels(id2label, dataset_features)
54
  continue
55
  if len(feature.names) != len(id2label_mapping.keys()):
56
  continue
57
-
58
  dataset_labels = feature.names
59
  # Try to match labels
60
  for label in feature.names:
 
9
 
10
  def get_labels_and_features_from_dataset(dataset_id, dataset_config, split):
11
  if not dataset_config:
12
+ dataset_config = "default"
13
  if not split:
14
+ split = "train"
15
  try:
16
  ds = datasets.load_dataset(dataset_id, dataset_config)[split]
17
  dataset_features = ds.features
 
54
  continue
55
  if len(feature.names) != len(id2label_mapping.keys()):
56
  continue
57
+
58
  dataset_labels = feature.names
59
  # Try to match labels
60
  for label in feature.names:
text_classification_ui_helpers.py CHANGED
@@ -9,11 +9,13 @@ import gradio as gr
9
  from transformers.pipelines import TextClassificationPipeline
10
 
11
  from io_utils import (get_yaml_path, read_column_mapping, save_job_to_pipe,
12
- write_column_mapping, write_log_to_user_file,
13
- write_inference_type)
14
  from text_classification import (check_model, get_example_prediction,
15
  get_labels_and_features_from_dataset)
16
- from wordings import CONFIRM_MAPPING_DETAILS_FAIL_RAW, MAPPING_STYLED_ERROR_WARNING, CHECK_CONFIG_OR_SPLIT_RAW
 
 
17
 
18
  MAX_LABELS = 20
19
  MAX_FEATURES = 20
@@ -42,30 +44,23 @@ def check_dataset_and_get_split(dataset_id, dataset_config):
42
  # gr.Warning(f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}")
43
  pass
44
 
 
45
  def select_run_mode(run_inf, inf_token, uid):
46
  if run_inf:
47
  if len(inf_token) > 0:
48
  write_inference_type(run_inf, inf_token, uid)
49
- return (
50
- gr.update(visible=True),
51
- gr.update(value=False))
52
  else:
53
- return (
54
- gr.update(visible=False),
55
- gr.update(value=True))
56
 
57
  def deselect_run_inference(run_local):
58
  if run_local:
59
- return (
60
- gr.update(visible=False),
61
- gr.update(value=False)
62
- )
63
  else:
64
- return (
65
- gr.update(visible=True),
66
- gr.update(value=True)
67
- )
68
-
69
  def write_column_mapping_to_config(
70
  dataset_id, dataset_config, dataset_split, uid, *labels
71
  ):
@@ -83,7 +78,7 @@ def write_column_mapping_to_config(
83
  all_mappings["labels"] = dict()
84
  for i, label in enumerate(labels[:MAX_LABELS]):
85
  if label:
86
- all_mappings["labels"][label] = ds_labels[i%len(ds_labels)]
87
  if "features" not in all_mappings.keys():
88
  all_mappings["features"] = dict()
89
  for _, feat in enumerate(labels[MAX_LABELS : (MAX_LABELS + MAX_FEATURES)]):
@@ -204,7 +199,9 @@ def try_submit(m_id, d_id, config, split, local, uid):
204
  gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
205
  return (gr.update(interactive=True), gr.update(visible=False))
206
  label_mapping = {}
207
- for i, label in zip(range(len(all_mappings["labels"].keys())), all_mappings["labels"].keys()):
 
 
208
  label_mapping.update({str(i): label})
209
 
210
  if "features" not in all_mappings.keys():
 
9
  from transformers.pipelines import TextClassificationPipeline
10
 
11
  from io_utils import (get_yaml_path, read_column_mapping, save_job_to_pipe,
12
+ write_column_mapping, write_inference_type,
13
+ write_log_to_user_file)
14
  from text_classification import (check_model, get_example_prediction,
15
  get_labels_and_features_from_dataset)
16
+ from wordings import (CHECK_CONFIG_OR_SPLIT_RAW,
17
+ CONFIRM_MAPPING_DETAILS_FAIL_RAW,
18
+ MAPPING_STYLED_ERROR_WARNING)
19
 
20
  MAX_LABELS = 20
21
  MAX_FEATURES = 20
 
44
  # gr.Warning(f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}")
45
  pass
46
 
47
+
48
  def select_run_mode(run_inf, inf_token, uid):
49
  if run_inf:
50
  if len(inf_token) > 0:
51
  write_inference_type(run_inf, inf_token, uid)
52
+ return (gr.update(visible=True), gr.update(value=False))
 
 
53
  else:
54
+ return (gr.update(visible=False), gr.update(value=True))
55
+
 
56
 
57
  def deselect_run_inference(run_local):
58
  if run_local:
59
+ return (gr.update(visible=False), gr.update(value=False))
 
 
 
60
  else:
61
+ return (gr.update(visible=True), gr.update(value=True))
62
+
63
+
 
 
64
  def write_column_mapping_to_config(
65
  dataset_id, dataset_config, dataset_split, uid, *labels
66
  ):
 
78
  all_mappings["labels"] = dict()
79
  for i, label in enumerate(labels[:MAX_LABELS]):
80
  if label:
81
+ all_mappings["labels"][label] = ds_labels[i % len(ds_labels)]
82
  if "features" not in all_mappings.keys():
83
  all_mappings["features"] = dict()
84
  for _, feat in enumerate(labels[MAX_LABELS : (MAX_LABELS + MAX_FEATURES)]):
 
199
  gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
200
  return (gr.update(interactive=True), gr.update(visible=False))
201
  label_mapping = {}
202
+ for i, label in zip(
203
+ range(len(all_mappings["labels"].keys())), all_mappings["labels"].keys()
204
+ ):
205
  label_mapping.update({str(i): label})
206
 
207
  if "features" not in all_mappings.keys():
utils.py CHANGED
@@ -1,5 +1,8 @@
1
- import yaml
2
  import sys
 
 
 
 
3
  # read scanners from yaml file
4
  # return a list of scanners
5
  def read_scanners(path):
@@ -9,16 +12,18 @@ def read_scanners(path):
9
  scanners = config.get("detectors", None)
10
  return scanners
11
 
 
12
  # convert a list of scanners to yaml file
13
  def write_scanners(scanners):
14
  with open("./scan_config.yaml", "w") as f:
15
  # save scanners to detectors in yaml
16
  yaml.dump({"detectors": scanners}, f)
17
 
 
18
  # convert column mapping dataframe to json
19
  def convert_column_mapping_to_json(df, label=""):
20
  column_mapping = {}
21
  column_mapping[label] = []
22
  for _, row in df.iterrows():
23
  column_mapping[label].append(row.tolist())
24
- return column_mapping
 
 
1
  import sys
2
+
3
+ import yaml
4
+
5
+
6
  # read scanners from yaml file
7
  # return a list of scanners
8
  def read_scanners(path):
 
12
  scanners = config.get("detectors", None)
13
  return scanners
14
 
15
+
16
  # convert a list of scanners to yaml file
17
  def write_scanners(scanners):
18
  with open("./scan_config.yaml", "w") as f:
19
  # save scanners to detectors in yaml
20
  yaml.dump({"detectors": scanners}, f)
21
 
22
+
23
  # convert column mapping dataframe to json
24
  def convert_column_mapping_to_json(df, label=""):
25
  column_mapping = {}
26
  column_mapping[label] = []
27
  for _, row in df.iterrows():
28
  column_mapping[label].append(row.tolist())
29
+ return column_mapping
validate_queue.py CHANGED
@@ -13,7 +13,6 @@ def sleep_a_while():
13
  return str(seconds)
14
 
15
 
16
-
17
  with gr.Blocks() as iface:
18
  text = gr.Textbox(label="Slept second")
19
 
 
13
  return str(seconds)
14
 
15
 
 
16
  with gr.Blocks() as iface:
17
  text = gr.Textbox(label="Slept second")
18
 
wordings.py CHANGED
@@ -36,4 +36,4 @@ MAPPING_STYLED_ERROR_WARNING = """
36
  <h3 style="text-align: center;color: coral; background-color: #fff0f3; border-radius: 8px; padding: 10px; ">
37
  Sorry, we cannot auto-align the labels/features of your dataset and model. Please double check.
38
  </h3>
39
- """
 
36
  <h3 style="text-align: center;color: coral; background-color: #fff0f3; border-radius: 8px; padding: 10px; ">
37
  Sorry, we cannot auto-align the labels/features of your dataset and model. Please double check.
38
  </h3>
39
+ """