kevinconka commited on
Commit
12aa779
·
1 Parent(s): 1b40095

backwards compatibility with 'add_batch' approach but with deprecation warning

Browse files
Files changed (2) hide show
  1. README.md +101 -264
  2. det-metrics.py +46 -22
README.md CHANGED
@@ -1,11 +1,10 @@
1
  ---
2
- title: Detection Metrics
3
  tags:
4
  - evaluate
5
  - metric
6
  description: >-
7
- Compute multiple object detection metrics at different bounding box area
8
- levels.
9
  sdk: gradio
10
  sdk_version: 3.19.1
11
  app_file: app.py
@@ -13,12 +12,75 @@ pinned: false
13
  emoji: 🕵️
14
  ---
15
 
16
- # Detection Metrics
17
 
18
- ## Description
19
- This metric can be used to calculate object detection metrics. It has an option to calculate the metrics at different levels of bounding box sizes, so that more insight is provided into the performance for different objects. It is adapted from the base of pycocotools metrics.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
- ## How to Use
22
  ```python
23
  import evaluate
24
  import logging
@@ -26,6 +88,7 @@ from seametrics.payload import PayloadProcessor
26
 
27
  logging.basicConfig(level=logging.WARNING)
28
 
 
29
  processor = PayloadProcessor(
30
  dataset_name="SAILING_DATASET_QA",
31
  gt_field="ground_truth_det",
@@ -34,9 +97,12 @@ processor = PayloadProcessor(
34
  data_type="rgb",
35
  )
36
 
 
37
  module = evaluate.load("SEA-AI/det-metrics")
38
- module.add_from_payload(processor.payload)
39
- module.compute()
 
 
40
  ```
41
 
42
  ```console
@@ -55,205 +121,16 @@ module.compute()
55
  'nImgs': 22}}
56
  ```
57
 
58
- ### Metric Settings
59
- When loading module: `module = evaluate.load("SEA-AI/det-metrics", **params)`, multiple parameters can be specified.
60
- - **area_ranges_tuples** *List[Tuple[str, List[int]]]*: Different levels of area ranges at which metrics should be calculated. It is a list that contains tuples, where the first element of each tuple should specify the name of the area range and the second element is list specifying the lower and upper limit of the area range. Defaults to `[("all", [0, 1e5.pow(2)])]`.
61
- - **bbox_format** *Literal["xyxy", "xywh", "cxcywh"]*: Bounding box format of predictions and ground truth. Defaults to `"xywh"`.
62
- - **iou_threshold** *Optional[float]*: at which IOU-treshold the metrics should be calculated. IOU-threshold defines the minimal overlap between a ground truth and predicted bounding box so that it is considered a correct prediction. Defaults to `1e-10`.
63
- - **class_agnostic** *bool*. Defaults to `True`. Non-class-agnostic metrics are currently not supported.
64
-
65
-
66
- ### Input Values
67
- Add predictions and ground truths to the metric with the function `module.add_batches(payload)`.
68
- The format of payload should be as returned by function `fo_to_payload()` defined in seametrics library.
69
- An example of how a payload might look like is:
70
-
71
- ```
72
- {
73
- 'dataset': 'SAILING_DATASET_QA',
74
- 'models': ['yolov5n6_RGB_D2304-v1_9C'
75
- ],
76
- 'gt_field_name': 'ground_truth_det',
77
- 'sequences': {
78
- # sequence 1,
79
- 1 frame with 1 pred and 1 gt
80
- 'Trip_14_Seq_1': {
81
- 'resolution': (720,
82
- 1280),
83
- 'yolov5n6_RGB_D2304-v1_9C': [
84
- [fo.Detection(
85
- label='FAR_AWAY_OBJECT',
86
- bounding_box=[
87
- 0.35107421875,
88
- 0.274658203125,
89
- 0.0048828125,
90
- 0.009765625
91
- ], # tp nr1
92
- confidence=0.153076171875
93
- )
94
- ]
95
- ],
96
- 'ground_truth_det': [
97
- [fo.Detection(
98
- label='FAR_AWAY_OBJECT',
99
- bounding_box=[
100
- 0.35107421875,
101
- 0.274658203125,
102
- 0.0048828125,
103
- 0.009765625
104
- ]
105
- )
106
- ]
107
- ]
108
- },
109
- # sequence 2,
110
- 2 frames with frame 1: 2 pred,
111
- 1 gt; frame 2: 1 pred 1 gt
112
- 'Trip_14_Seq_2': {
113
- 'resolution': (720,
114
- 1280),
115
- 'yolov5n6_RGB_D2304-v1_9C': [
116
- [
117
- fo.Detection(
118
- label='FAR_AWAY_OBJECT',
119
- bounding_box=[
120
- 0.389404296875,
121
- 0.306640625,
122
- 0.005126953125,
123
- 0.0146484375
124
- ], # tp nr 2
125
- confidence=0.153076171875
126
- ),
127
- fo.Detection(
128
- label='FAR_AWAY_OBJECT',
129
- bounding_box=[
130
- 0.50390625,
131
- 0.357666015625,
132
- 0.0048828125,
133
- 0.00976562
134
- ], # fp nr 1
135
- confidence=0.153076171875
136
- ),
137
- fo.Detection(
138
- label='FAR_AWAY_OBJECT',
139
- bounding_box=[
140
- 0.455078125,
141
- 0.31494140625,
142
- 0.00390625,
143
- 0.0087890625
144
- ], # fp nr 2
145
- confidence=0.153076171875
146
- )
147
- ],
148
- [
149
- fo.Detection(
150
- label='FAR_AWAY_OBJECT',
151
- bounding_box=[
152
- 0.455078125,
153
- 0.31494140625,
154
- 0.00390625,
155
- 0.0087890625
156
- ], # tp nr 3
157
- confidence=0.153076171875
158
- )
159
- ],
160
- [
161
- fo.Detection(
162
- label='FAR_AWAY_OBJECT',
163
- bounding_box=[
164
- 0.455078125,
165
- 0.31494140625,
166
- 0.00390625,
167
- 0.0087890625
168
- ], # fp nr 3
169
- confidence=0.153076171875
170
- )
171
- ]
172
- ],
173
- 'ground_truth_det': [
174
- # frame nr 1
175
- [
176
- fo.Detection(
177
- label='FAR_AWAY_OBJECT',
178
- bounding_box=[
179
- 0.389404296875,
180
- 0.306640625,
181
- 0.005126953125,
182
- 0.0146484375
183
- ],
184
- )
185
- ],
186
- # frame nr 2
187
- [
188
- fo.Detection(
189
- label='FAR_AWAY_OBJECT',
190
- bounding_box=[
191
- 0.455078125,
192
- 0.31494140625,
193
- 0.00390625,
194
- 0.0087890625
195
- ],
196
- confidence=0.153076171875
197
- ),
198
- fo.Detection(
199
- label='FAR_AWAY_OBJECT',
200
- bounding_box=[
201
- 0.35107421875,
202
- 0.274658203125,
203
- 0.0048828125,
204
- 0.009765625
205
- ], # missed nr 1
206
- confidence=0.153076171875
207
- )
208
- ],
209
- # frame nr3
210
- [],
211
- ]
212
- }
213
- },
214
- "sequence_list": [
215
- "Trip_14_Seq_1", 'Trip_14_Seq_2'
216
- ]
217
- }
218
- ```
219
 
220
- Optionally, you can pass the model as string that should be evaluated, via `model=model_str`. By default, it will evaluate the first model, i.e. `model = payload["models"][0]`.
221
-
222
- ### Output Values
223
- The metric outputs a dictionary that contains sub-dictionaries for each name of the specified area ranges.
224
- Each sub-dictionary holds performance metrics at the specific area range level:
225
- - **range**: corresponding area range
226
- - **iouThr**: IOU-threshold used in calculating the metric
227
- - **maxDets**: maximum number of detections in calculating the metrics
228
- - **tp**: number of true positive predictions
229
- - **fp**: number of false positive predictions
230
- - **fn**: number of false negative predictions
231
- - **duplicates**: number of duplicated bounding box predictions
232
- - **precision**: ratio between true positive predictions and positive predictions (tp/(tp+fp))
233
- - **recall**: ratio between true positive predictions and actual ground truths (tp/(tp+fn))
234
- - **f1**: trades-off precision and recall (2*(precision*recall)/(precision+recall))
235
- - **support**: number of ground truth bounding boxes that are considered in the metric
236
- - **fpi**: number of images with predictions but no ground truths
237
- - **nImgs**: number of total images considered in calculating the metric
238
-
239
-
240
- ### Examples
241
- We can specify different area range levels, at which we would like to compute the metrics.
242
- ```python
243
- import evaluate
244
- import logging
245
- from seametrics.payload import PayloadProcessor
246
 
247
- logging.basicConfig(level=logging.WARNING)
248
-
249
- processor = PayloadProcessor(
250
- dataset_name="SAILING_DATASET_QA",
251
- gt_field="ground_truth_det",
252
- models=["yolov5n6_RGB_D2304-v1_9C"],
253
- sequence_list=["Trip_14_Seq_1"],
254
- data_type="rgb",
255
- )
256
 
 
257
  area_ranges_tuples = [
258
  ("all", [0, 1e5**2]),
259
  ("small", [0**2, 6**2]),
@@ -266,68 +143,28 @@ module = evaluate.load(
266
  iou_thresholds=[0.00001],
267
  area_ranges_tuples=area_ranges_tuples,
268
  )
269
- module.add_from_payload(processor.payload)
270
- module.compute()
271
  ```
272
 
273
- ```
274
- {'all': {'range': [0, 10000000000.0],
275
- 'iouThr': '0.00',
276
- 'maxDets': 100,
277
- 'tp': 89,
278
- 'fp': 13,
279
- 'fn': 15,
280
- 'duplicates': 1,
281
- 'precision': 0.8725490196078431,
282
- 'recall': 0.8557692307692307,
283
- 'f1': 0.8640776699029126,
284
- 'support': 104,
285
- 'fpi': 0,
286
- 'nImgs': 22},
287
- 'small': {'range': [0, 36],
288
- 'iouThr': '0.00',
289
- 'maxDets': 100,
290
- 'tp': 12,
291
- 'fp': 3,
292
- 'fn': 8,
293
- 'duplicates': 0,
294
- 'precision': 0.8,
295
- 'recall': 0.6,
296
- 'f1': 0.6857142857142857,
297
- 'support': 20,
298
- 'fpi': 0,
299
- 'nImgs': 22},
300
- 'medium': {'range': [36, 144],
301
- 'iouThr': '0.00',
302
- 'maxDets': 100,
303
- 'tp': 50,
304
- 'fp': 10,
305
- 'fn': 7,
306
- 'duplicates': 1,
307
- 'precision': 0.8333333333333334,
308
- 'recall': 0.8771929824561403,
309
- 'f1': 0.8547008547008548,
310
- 'support': 57,
311
- 'fpi': 0,
312
- 'nImgs': 22},
313
- 'large': {'range': [144, 10000000000.0],
314
- 'iouThr': '0.00',
315
- 'maxDets': 100,
316
- 'tp': 27,
317
- 'fp': 0,
318
- 'fn': 0,
319
- 'duplicates': 0,
320
- 'precision': 1.0,
321
- 'recall': 1.0,
322
- 'f1': 1.0,
323
- 'support': 27,
324
- 'fpi': 0,
325
- 'nImgs': 22}}
326
- ```
327
 
328
  ## Further References
329
- *seametrics* library: https://github.com/SEA-AI/seametrics/tree/main
330
 
331
- Calculating metrics is based on pycoco tools: https://github.com/cocodataset/cocoapi/tree/master/PythonAPI/pycocotools
 
 
 
 
332
 
333
- Further info about metrics: https://www.analyticsvidhya.com/blog/2020/09/precision-recall-machine-learning/
 
1
  ---
2
+ title: det-metrics
3
  tags:
4
  - evaluate
5
  - metric
6
  description: >-
7
+ Modified cocoevals.py which is wrapped into torchmetrics' mAP metric with numpy instead of torch dependency.
 
8
  sdk: gradio
9
  sdk_version: 3.19.1
10
  app_file: app.py
 
12
  emoji: 🕵️
13
  ---
14
 
15
+ # SEA-AI/det-metrics
16
 
17
+ This hugging face metric uses `seametrics.detection.PrecisionRecallF1Support` under the hood to compute coco-like metrics for object detection tasks. It is a [modified cocoeval.py](https://github.com/SEA-AI/seametrics/blob/develop/seametrics/detection/cocoeval.py) wrapped inside [torchmetrics' mAP metric](https://lightning.ai/docs/torchmetrics/stable/detection/mean_average_precision.html) but with numpy arrays instead of torch tensors.
18
+
19
+ ## Getting Started
20
+
21
+ To get started with det-metrics, make sure you have the necessary dependencies installed. This metric relies on the `evaluate` and `seametrics` libraries for metric calculation and integration with FiftyOne datasets.
22
+
23
+ ### Installation
24
+
25
+ First, ensure you have Python 3.8 or later installed. Then, install det-metrics using pip:
26
+
27
+ ```sh
28
+ pip install evaluate git+https://github.com/SEA-AI/seametrics@develop
29
+ ```
30
+
31
+ ### Basic Usage
32
+
33
+ Here's how to quickly evaluate your object detection models using SEA-AI/det-metrics:
34
+
35
+ ```python
36
+ import evaluate
37
+
38
+ # Define your predictions and references (dict values can also by numpy arrays)
39
+ predictions = [
40
+ {
41
+ "boxes": [[449.3, 197.75390625, 6.25, 7.03125], [334.3, 181.58203125, 11.5625, 6.85546875]],
42
+ "labels": [0, 0],
43
+ "scores": [0.153076171875, 0.72314453125],
44
+ }
45
+ ]
46
+
47
+ references = [
48
+ {
49
+ "boxes": [[449.3, 197.75390625, 6.25, 7.03125], [334.3, 181.58203125, 11.5625, 6.85546875]],
50
+ "labels": [0, 0],
51
+ "area": [132.2, 83.8],
52
+ }
53
+ ]
54
+
55
+ # Load SEA-AI/det-metrics and evaluate
56
+ module = evaluate.load("SEA-AI/det-metrics")
57
+ module.add(prediction=predictions, reference=references)
58
+ results = module.compute()
59
+
60
+ print(results)
61
+ ```
62
+
63
+ This will output the evaluation metrics for your detection model.
64
+ ```
65
+ {'all': {'range': [0, 10000000000.0],
66
+ 'iouThr': '0.00',
67
+ 'maxDets': 100,
68
+ 'tp': 2,
69
+ 'fp': 0,
70
+ 'fn': 0,
71
+ 'duplicates': 0,
72
+ 'precision': 1.0,
73
+ 'recall': 1.0,
74
+ 'f1': 1.0,
75
+ 'support': 2,
76
+ 'fpi': 0,
77
+ 'nImgs': 1}
78
+ ```
79
+
80
+ ## FiftyOne Integration
81
+
82
+ Integrate SEA-AI/det-metrics with FiftyOne datasets for enhanced analysis and visualization:
83
 
 
84
  ```python
85
  import evaluate
86
  import logging
 
88
 
89
  logging.basicConfig(level=logging.WARNING)
90
 
91
+ # Configure your dataset and model details
92
  processor = PayloadProcessor(
93
  dataset_name="SAILING_DATASET_QA",
94
  gt_field="ground_truth_det",
 
97
  data_type="rgb",
98
  )
99
 
100
+ # Evaluate using SEA-AI/det-metrics
101
  module = evaluate.load("SEA-AI/det-metrics")
102
+ module.add_payload(processor.payload)
103
+ results = module.compute()
104
+
105
+ print(results)
106
  ```
107
 
108
  ```console
 
121
  'nImgs': 22}}
122
  ```
123
 
124
+ ## Metric Settings
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
+ Customize your evaluation by specifying various parameters when loading SEA-AI/det-metrics:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
 
128
+ - **area_ranges_tuples**: Define different area ranges for metrics calculation.
129
+ - **bbox_format**: Set the bounding box format (e.g., `"xywh"`).
130
+ - **iou_threshold**: Choose the IOU threshold for determining correct detections.
131
+ - **class_agnostic**: Specify whether to calculate metrics disregarding class labels.
 
 
 
 
 
132
 
133
+ ```python
134
  area_ranges_tuples = [
135
  ("all", [0, 1e5**2]),
136
  ("small", [0**2, 6**2]),
 
143
  iou_thresholds=[0.00001],
144
  area_ranges_tuples=area_ranges_tuples,
145
  )
 
 
146
  ```
147
 
148
+ ## Output Values
149
+
150
+ SEA-AI/det-metrics provides a detailed breakdown of performance metrics for each specified area range:
151
+
152
+ - **range**: The area range considered.
153
+ - **iouThr**: The IOU threshold applied.
154
+ - **maxDets**: The maximum number of detections evaluated.
155
+ - **tp/fp/fn**: Counts of true positives, false positives, and false negatives.
156
+ - **duplicates**: Number of duplicate detections.
157
+ - **precision/recall/f1**: Calculated precision, recall, and F1 score.
158
+ - **support**: Number of ground truth boxes considered.
159
+ - **fpi**: Number of images with predictions but no ground truths.
160
+ - **nImgs**: Total number of images evaluated.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
 
162
  ## Further References
 
163
 
164
+ - **seametrics Library**: Explore the [seametrics GitHub repository](https://github.com/SEA-AI/seametrics/tree/main) for more details on the underlying library.
165
+ - **Pycoco Tools**: SEA-AI/det-metrics calculations are based on [pycoco tools](https://github.com/cocodataset/cocoapi/tree/master/PythonAPI/pycocotools), a widely used library for COCO dataset evaluation.
166
+ - **Understanding Metrics**: For a deeper understanding of precision, recall, and other metrics, read [this comprehensive guide](https://www.analyticsvidhya.com/blog/2020/09/precision-recall-machine-learning/).
167
+
168
+ ## Contribution
169
 
170
+ Your contributions are welcome! If you'd like to improve SEA-AI/det-metrics or add new features, please feel free to fork the repository, make your changes, and submit a pull request.
det-metrics.py CHANGED
@@ -14,14 +14,13 @@
14
  """TODO: Add a description here."""
15
 
16
  from typing import List, Tuple, Literal
 
17
 
18
  import evaluate
19
  import datasets
20
  import numpy as np
21
 
22
  from seametrics.detection import PrecisionRecallF1Support
23
- from seametrics.detection.utils import payload_to_det_metric
24
- from seametrics.payload import Payload
25
 
26
  _CITATION = """\
27
  @InProceedings{coco:2020,
@@ -125,16 +124,12 @@ class DetectionMetric(evaluate.Metric):
125
  **kwargs
126
  ):
127
  super().__init__(**kwargs)
128
- area_ranges = [v for _, v in area_ranges_tuples]
129
- area_ranges_labels = [k for k, _ in area_ranges_tuples]
130
- iou_threshold = (
131
- [iou_threshold] if not isinstance(iou_threshold, list) else iou_threshold
132
- )
133
-
134
  self.coco_metric = PrecisionRecallF1Support(
135
- iou_thresholds=iou_threshold,
136
- area_ranges=area_ranges,
137
- area_ranges_labels=area_ranges_labels,
 
 
138
  class_agnostic=class_agnostic,
139
  iou_type=iou_type,
140
  box_format=bbox_format,
@@ -183,15 +178,45 @@ class DetectionMetric(evaluate.Metric):
183
 
184
  def add(self, *, prediction, reference, **kwargs):
185
  """Adds a batch of predictions and references to the metric"""
 
 
 
 
186
  self.coco_metric.update(prediction, reference)
187
 
188
  # does not impact the metric, but is required for the interface x_x
189
  super(evaluate.Metric, self).add(
190
- prediction=[self._np_to_lists(p) for p in prediction],
191
- references=[self._np_to_lists(r) for r in reference],
192
  **kwargs
193
  )
194
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
  def _np_to_lists(self, d):
196
  """datasets does not support numpy arrays for type checking"""
197
  for k, v in d.items():
@@ -201,12 +226,11 @@ class DetectionMetric(evaluate.Metric):
201
  d[k] = v.tolist()
202
  return d
203
 
204
- def _compute(self, *, predictions, references, **kwargs):
205
- """Returns the scores"""
206
- return self.coco_metric.compute()["metrics"]
207
-
208
- def add_from_payload(self, payload: Payload, model_name: str = None):
209
- """Converts the payload to the format expected by the metric"""
210
- predictions, references = payload_to_det_metric(payload, model_name)
211
- self.add(prediction=predictions, reference=references)
212
- return self
 
14
  """TODO: Add a description here."""
15
 
16
  from typing import List, Tuple, Literal
17
+ from deprecated import deprecated
18
 
19
  import evaluate
20
  import datasets
21
  import numpy as np
22
 
23
  from seametrics.detection import PrecisionRecallF1Support
 
 
24
 
25
  _CITATION = """\
26
  @InProceedings{coco:2020,
 
124
  **kwargs
125
  ):
126
  super().__init__(**kwargs)
 
 
 
 
 
 
127
  self.coco_metric = PrecisionRecallF1Support(
128
+ iou_thresholds=(
129
+ iou_threshold if isinstance(iou_threshold, list) else [iou_threshold]
130
+ ),
131
+ area_ranges=[v for _, v in area_ranges_tuples],
132
+ area_ranges_labels=[k for k, _ in area_ranges_tuples],
133
  class_agnostic=class_agnostic,
134
  iou_type=iou_type,
135
  box_format=bbox_format,
 
178
 
179
  def add(self, *, prediction, reference, **kwargs):
180
  """Adds a batch of predictions and references to the metric"""
181
+ # in case the inputs are lists, convert them to numpy arrays
182
+ prediction = self._preprocess(prediction)
183
+ reference = self._preprocess(reference)
184
+
185
  self.coco_metric.update(prediction, reference)
186
 
187
  # does not impact the metric, but is required for the interface x_x
188
  super(evaluate.Metric, self).add(
189
+ prediction=self._postprocess(prediction),
190
+ references=self._postprocess(reference),
191
  **kwargs
192
  )
193
 
194
+ @deprecated(reason="Use `module.from_payload` instead")
195
+ def add_batch(self, payload, model_name: str = None):
196
+ """Takes as input a payload and adds the batch to the metric"""
197
+ self.add_payload(payload, model_name)
198
+
199
+ def _compute(self, *, predictions, references, **kwargs):
200
+ """Called within the evaluate.Metric.compute() method"""
201
+ return self.coco_metric.compute()["metrics"]
202
+
203
+ def add_payload(self, payload, model_name: str = None):
204
+ """Converts the payload to the format expected by the metric"""
205
+ # import only if needed since fiftyone is not a direct dependency
206
+ from seametrics.detection.utils import payload_to_det_metric
207
+
208
+ predictions, references = payload_to_det_metric(payload, model_name)
209
+ self.add(prediction=predictions, reference=references)
210
+ return self
211
+
212
+ def _preprocess(self, list_of_dicts):
213
+ """Converts the lists to numpy arrays for type checking"""
214
+ return [self._lists_to_np(d) for d in list_of_dicts]
215
+
216
+ def _postprocess(self, list_of_dicts):
217
+ """Converts the numpy arrays to lists for type checking"""
218
+ return [self._np_to_lists(d) for d in list_of_dicts]
219
+
220
  def _np_to_lists(self, d):
221
  """datasets does not support numpy arrays for type checking"""
222
  for k, v in d.items():
 
226
  d[k] = v.tolist()
227
  return d
228
 
229
+ def _lists_to_np(self, d):
230
+ """datasets does not support numpy arrays for type checking"""
231
+ for k, v in d.items():
232
+ if isinstance(v, dict):
233
+ self._lists_to_np(v)
234
+ elif isinstance(v, list):
235
+ d[k] = np.array(v)
236
+ return d