inoki-giskard ZeroCommand commited on
Commit
7f86019
·
verified ·
1 Parent(s): d9ca844

show loading time when inference api request out of time (#55)

Browse files

- handle inference api error; fix not text dataset columns (201d15624bcdeec74b8095fe5fb7e40d44ef85e3)
- cast float to int (6314bb550c6d82730a3bf3d225df782e21a4b98d)
- fix f string (d0be1568e2d52610d30b619ba7f8b11bbc852d85)
- handle hf api response with custom class (6eb580200cb862ad44723cff8370883ce03c9be9)
- update config yaml (98175fe87a123b3f2e124d68da46beff34a94957)
- update config yaml (63a19f026fbece9ce40eb4af359a3a486d2ad56b)


Co-authored-by: zcy <[email protected]>

app_text_classification.py CHANGED
@@ -128,7 +128,11 @@ def get_demo():
128
  fn=get_related_datasets_from_leaderboard,
129
  inputs=[model_id_input],
130
  outputs=[dataset_id_input],
131
- ).then(fn=check_dataset, inputs=[dataset_id_input], outputs=[dataset_config_input, dataset_split_input, loading_status])
 
 
 
 
132
 
133
  gr.on(
134
  triggers=[dataset_id_input.input],
 
128
  fn=get_related_datasets_from_leaderboard,
129
  inputs=[model_id_input],
130
  outputs=[dataset_id_input],
131
+ ).then(
132
+ fn=check_dataset,
133
+ inputs=[dataset_id_input],
134
+ outputs=[dataset_config_input, dataset_split_input, loading_status]
135
+ )
136
 
137
  gr.on(
138
  triggers=[dataset_id_input.input],
config.yaml CHANGED
@@ -1,6 +1,8 @@
1
  configuration:
2
  ethical_bias:
3
- threshold: 0.01
 
 
4
  detectors:
5
  - ethical_bias
6
  - text_perturbation
 
1
  configuration:
2
  ethical_bias:
3
+ threshold: 0.05
4
+ performance:
5
+ alpha: 0.05
6
  detectors:
7
  - ethical_bias
8
  - text_perturbation
text_classification.py CHANGED
@@ -7,12 +7,16 @@ import pandas as pd
7
  from transformers import pipeline
8
  import requests
9
  import os
10
- import time
11
 
 
12
  HF_WRITE_TOKEN = "HF_WRITE_TOKEN"
13
 
14
  logger = logging.getLogger(__file__)
15
 
 
 
 
 
16
 
17
  def get_labels_and_features_from_dataset(ds):
18
  try:
@@ -76,19 +80,18 @@ def hf_inference_api(model_id, hf_token, payload):
76
  )
77
  url = f"{hf_inference_api_endpoint}/models/{model_id}"
78
  headers = {"Authorization": f"Bearer {hf_token}"}
79
- output = {"error": "First attemp"}
80
- attempt = 30
81
- while "error" in output and attempt > 0:
82
- response = requests.post(url, headers=headers, json=payload)
83
- if response.status_code != 200:
84
- logging.error(f"Request to inference API returns {response.status_code}")
85
- try:
86
- return response.json()
87
- except Exception:
88
- logging.error(f"{response.content}")
89
- output = {"error": response.content}
90
- attempt -= 1
91
- time.sleep(2)
92
 
93
  def check_model_pipeline(model_id):
94
  try:
@@ -262,6 +265,12 @@ def check_dataset_features_validity(d_id, config, split):
262
 
263
  return df, dataset_features
264
 
 
 
 
 
 
 
265
 
266
  def get_example_prediction(model_id, dataset_id, dataset_config, dataset_split):
267
  # get a sample prediction from the model on the dataset
@@ -272,13 +281,21 @@ def get_example_prediction(model_id, dataset_id, dataset_config, dataset_split):
272
  ds = datasets.load_dataset(dataset_id, dataset_config)[dataset_split]
273
  if "text" not in ds.features.keys():
274
  # Dataset does not have text column
275
- prediction_input = ds[0][list(ds.features.keys())[0]]
276
  else:
277
  prediction_input = ds[0]["text"]
278
-
279
  hf_token = os.environ.get(HF_WRITE_TOKEN, default="")
280
  payload = {"inputs": prediction_input, "options": {"use_cache": True}}
281
  results = hf_inference_api(model_id, hf_token, payload)
 
 
 
 
 
 
 
 
282
  while isinstance(results, list):
283
  if isinstance(results[0], dict):
284
  break
@@ -287,8 +304,8 @@ def get_example_prediction(model_id, dataset_id, dataset_config, dataset_split):
287
  f'{result["label"]}': result["score"] for result in results
288
  }
289
  except Exception as e:
290
- # Pipeline prediction failed, need to provide labels
291
- logger.warn(f"Pipeline prediction failed due to {e}")
292
  return prediction_input, None
293
 
294
  return prediction_input, prediction_result
 
7
  from transformers import pipeline
8
  import requests
9
  import os
 
10
 
11
+ logger = logging.getLogger(__name__)
12
  HF_WRITE_TOKEN = "HF_WRITE_TOKEN"
13
 
14
  logger = logging.getLogger(__file__)
15
 
16
+ class HuggingFaceInferenceAPIResponse:
17
+ def __init__(self, message):
18
+ self.message = message
19
+
20
 
21
  def get_labels_and_features_from_dataset(ds):
22
  try:
 
80
  )
81
  url = f"{hf_inference_api_endpoint}/models/{model_id}"
82
  headers = {"Authorization": f"Bearer {hf_token}"}
83
+ response = requests.post(url, headers=headers, json=payload)
84
+ if not hasattr(response, "status_code") or response.status_code != 200:
85
+ logger.warning(f"Request to inference API returns {response}")
86
+ try:
87
+ return response.json()
88
+ except Exception:
89
+ return {"error": response.content}
90
+
91
+ def preload_hf_inference_api(model_id):
92
+ payload = {"inputs": "This is a test", "options": {"use_cache": True, }}
93
+ hf_token = os.environ.get(HF_WRITE_TOKEN, default="")
94
+ hf_inference_api(model_id, hf_token, payload)
 
95
 
96
  def check_model_pipeline(model_id):
97
  try:
 
265
 
266
  return df, dataset_features
267
 
268
+ def select_the_first_string_column(ds):
269
+ for feature in ds.features.keys():
270
+ if isinstance(ds[0][feature], str):
271
+ return feature
272
+ return None
273
+
274
 
275
  def get_example_prediction(model_id, dataset_id, dataset_config, dataset_split):
276
  # get a sample prediction from the model on the dataset
 
281
  ds = datasets.load_dataset(dataset_id, dataset_config)[dataset_split]
282
  if "text" not in ds.features.keys():
283
  # Dataset does not have text column
284
+ prediction_input = ds[0][select_the_first_string_column(ds)]
285
  else:
286
  prediction_input = ds[0]["text"]
287
+
288
  hf_token = os.environ.get(HF_WRITE_TOKEN, default="")
289
  payload = {"inputs": prediction_input, "options": {"use_cache": True}}
290
  results = hf_inference_api(model_id, hf_token, payload)
291
+
292
+ if isinstance(results, dict) and "error" in results.keys():
293
+ if "estimated_time" in results.keys():
294
+ return prediction_input, HuggingFaceInferenceAPIResponse(
295
+ f"Estimated time: {int(results['estimated_time'])}s. Please try again later.")
296
+ return prediction_input, HuggingFaceInferenceAPIResponse(
297
+ f"Inference Error: {results['error']}.")
298
+
299
  while isinstance(results, list):
300
  if isinstance(results[0], dict):
301
  break
 
304
  f'{result["label"]}': result["score"] for result in results
305
  }
306
  except Exception as e:
307
+ # inference api prediction failed, show the error message
308
+ logger.error(f"Get example prediction failed {e}")
309
  return prediction_input, None
310
 
311
  return prediction_input, prediction_result
text_classification_ui_helpers.py CHANGED
@@ -12,8 +12,10 @@ from io_utils import read_column_mapping, write_column_mapping
12
  from run_jobs import save_job_to_pipe
13
  from text_classification import (
14
  check_model_task,
 
15
  get_example_prediction,
16
  get_labels_and_features_from_dataset,
 
17
  )
18
  from wordings import (
19
  CHECK_CONFIG_OR_SPLIT_RAW,
@@ -159,9 +161,10 @@ def precheck_model_ds_enable_example_btn(
159
  model_id, dataset_id, dataset_config, dataset_split
160
  ):
161
  model_task = check_model_task(model_id)
 
162
  if model_task is None or model_task != "text-classification":
163
  gr.Warning("Please check your model.")
164
- return gr.update(interactive=False), ""
165
 
166
  if dataset_config is None or dataset_split is None or len(dataset_config) == 0:
167
  return (gr.update(), gr.update(), "")
@@ -182,8 +185,6 @@ def precheck_model_ds_enable_example_btn(
182
  return (gr.update(interactive=False), gr.update(value=pd.DataFrame(), visible=False), "")
183
 
184
 
185
-
186
-
187
  def align_columns_and_show_prediction(
188
  model_id,
189
  dataset_id,
@@ -209,12 +210,31 @@ def align_columns_and_show_prediction(
209
  gr.Dropdown(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)
210
  ]
211
 
212
- # FIXME: prefiction_output could be None
213
- prediction_input, prediction_output = get_example_prediction(
214
  model_id, dataset_id, dataset_config, dataset_split
215
  )
216
 
217
- model_labels = list(prediction_output.keys())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
 
219
  ds = datasets.load_dataset(dataset_id, dataset_config)[dataset_split]
220
  ds_labels, ds_features = get_labels_and_features_from_dataset(ds)
@@ -255,7 +275,7 @@ def align_columns_and_show_prediction(
255
 
256
  return (
257
  gr.update(value=get_styled_input(prediction_input), visible=True),
258
- gr.update(value=prediction_output, visible=True),
259
  gr.update(visible=True, open=False),
260
  gr.update(interactive=(run_inference and inference_token != "")),
261
  "",
 
12
  from run_jobs import save_job_to_pipe
13
  from text_classification import (
14
  check_model_task,
15
+ preload_hf_inference_api,
16
  get_example_prediction,
17
  get_labels_and_features_from_dataset,
18
+ HuggingFaceInferenceAPIResponse,
19
  )
20
  from wordings import (
21
  CHECK_CONFIG_OR_SPLIT_RAW,
 
161
  model_id, dataset_id, dataset_config, dataset_split
162
  ):
163
  model_task = check_model_task(model_id)
164
+ preload_hf_inference_api(model_id)
165
  if model_task is None or model_task != "text-classification":
166
  gr.Warning("Please check your model.")
167
+ return (gr.update(), gr.update(),"")
168
 
169
  if dataset_config is None or dataset_split is None or len(dataset_config) == 0:
170
  return (gr.update(), gr.update(), "")
 
185
  return (gr.update(interactive=False), gr.update(value=pd.DataFrame(), visible=False), "")
186
 
187
 
 
 
188
  def align_columns_and_show_prediction(
189
  model_id,
190
  dataset_id,
 
210
  gr.Dropdown(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)
211
  ]
212
 
213
+ prediction_input, prediction_response = get_example_prediction(
 
214
  model_id, dataset_id, dataset_config, dataset_split
215
  )
216
 
217
+ if prediction_input is None or prediction_response is None:
218
+ return (
219
+ gr.update(visible=False),
220
+ gr.update(visible=False),
221
+ gr.update(visible=False, open=False),
222
+ gr.update(interactive=False),
223
+ "",
224
+ *dropdown_placement,
225
+ )
226
+
227
+ if isinstance(prediction_response, HuggingFaceInferenceAPIResponse):
228
+ return (
229
+ gr.update(visible=False),
230
+ gr.update(visible=False),
231
+ gr.update(visible=False, open=False),
232
+ gr.update(interactive=False),
233
+ f"Hugging Face Inference API is loading your model. {prediction_response.message}",
234
+ *dropdown_placement,
235
+ )
236
+
237
+ model_labels = list(prediction_response.keys())
238
 
239
  ds = datasets.load_dataset(dataset_id, dataset_config)[dataset_split]
240
  ds_labels, ds_features = get_labels_and_features_from_dataset(ds)
 
275
 
276
  return (
277
  gr.update(value=get_styled_input(prediction_input), visible=True),
278
+ gr.update(value=prediction_response, visible=True),
279
  gr.update(visible=True, open=False),
280
  gr.update(interactive=(run_inference and inference_token != "")),
281
  "",