Mohdsalem099 commited on
Commit
bc59155
·
1 Parent(s): bfd3318

Upload 4 files

Browse files
Files changed (4) hide show
  1. Dockerfile.txt +12 -0
  2. function.yaml +36 -0
  3. main.py +35 -0
  4. model_handler.py +233 -0
Dockerfile.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM openvino/ubuntu20_dev:2022.3.0 AS build
2
+
3
+ USER root
4
+
5
+ RUN omz_downloader \
6
+ --name text-detection-0004 \
7
+ --precisions FP32 \
8
+ -o /opt/nuclio/open_model_zoo
9
+
10
+ FROM cvat.openvino.base
11
+
12
+ COPY --from=build --chown=root:root /opt/nuclio/open_model_zoo /opt/nuclio/open_model_zoo
function.yaml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ metadata:
2
+ name: openvino-omz-intel-text-detection-0004
3
+ namespace: cvat
4
+ annotations:
5
+ name: Text detection v4
6
+ type: detector
7
+ framework: openvino
8
+ spec: |
9
+ [
10
+ { "id": 1, "name": "text" }
11
+ ]
12
+
13
+ spec:
14
+ description: Text detector based on PixelLink architecture with MobileNetV2-like as a backbone for indoor/outdoor scenes.
15
+ runtime: 'python:3.8'
16
+ handler: main:handler
17
+ eventTimeout: 30s
18
+
19
+ build:
20
+ image: cvat.openvino.omz.intel.text-detection-0004
21
+ baseImage: cvat.openvino.omz.intel.text-detection-0004.base
22
+
23
+ triggers:
24
+ myHttpTrigger:
25
+ maxWorkers: 2
26
+ kind: 'http'
27
+ workerAvailabilityTimeoutMilliseconds: 10000
28
+ attributes:
29
+ maxRequestBodySize: 33554432 # 32MB
30
+
31
+ platform:
32
+ attributes:
33
+ restartPolicy:
34
+ name: always
35
+ maximumRetryCount: 3
36
+ mountMode: volume
main.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import base64
3
+ from PIL import Image
4
+ import io
5
+ from model_handler import ModelHandler
6
+ import yaml
7
+
8
+ def init_context(context):
9
+ context.logger.info("Init context... 0%")
10
+
11
+ # Read labels
12
+ with open("/opt/nuclio/function.yaml", 'rb') as function_file:
13
+ functionconfig = yaml.safe_load(function_file)
14
+ labels_spec = functionconfig['metadata']['annotations']['spec']
15
+ labels = {item['id']: item['name'] for item in json.loads(labels_spec)}
16
+
17
+ # Read the DL model
18
+ model = ModelHandler(labels)
19
+ context.user_data.model = model
20
+
21
+ context.logger.info("Init context...100%")
22
+
23
+ def handler(context, event):
24
+ context.logger.info("Run text-detection-0004 model")
25
+ data = event.body
26
+ buf = io.BytesIO(base64.b64decode(data["image"]))
27
+ pixel_threshold = float(data.get("pixel_threshold", 0.8))
28
+ link_threshold = float(data.get("link_threshold", 0.8))
29
+ image = Image.open(buf)
30
+
31
+ results = context.user_data.model.infer(image,
32
+ pixel_threshold, link_threshold)
33
+
34
+ return context.Response(body=json.dumps(results), headers={},
35
+ content_type='application/json', status_code=200)
model_handler.py ADDED
@@ -0,0 +1,233 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (C) 2020-2022 Intel Corporation
2
+ # Copyright (C) 2022 CVAT.ai Corporation
3
+ #
4
+ # SPDX-License-Identifier: MIT
5
+
6
+ import os
7
+ import cv2
8
+ import numpy as np
9
+ from model_loader import ModelLoader
10
+ from shared import to_cvat_mask
11
+
12
+
13
+ class PixelLinkDecoder():
14
+ def __init__(self, pixel_threshold, link_threshold):
15
+ four_neighbours = False
16
+ if four_neighbours:
17
+ self._get_neighbours = self._get_neighbours_4
18
+ else:
19
+ self._get_neighbours = self._get_neighbours_8
20
+ self.pixel_conf_threshold = pixel_threshold
21
+ self.link_conf_threshold = link_threshold
22
+
23
+ def decode(self, height, width, detections: dict):
24
+ self.image_height = height
25
+ self.image_width = width
26
+ self.pixel_scores = self._set_pixel_scores(detections['model/segm_logits/add'])
27
+ self.link_scores = self._set_link_scores(detections['model/link_logits_/add'])
28
+
29
+ self.pixel_mask = self.pixel_scores >= self.pixel_conf_threshold
30
+ self.link_mask = self.link_scores >= self.link_conf_threshold
31
+ self.points = list(zip(*np.where(self.pixel_mask)))
32
+ self.h, self.w = np.shape(self.pixel_mask)
33
+ self.group_mask = dict.fromkeys(self.points, -1)
34
+ self.bboxes = None
35
+ self.root_map = None
36
+ self.mask = None
37
+
38
+ self._decode()
39
+
40
+ def _softmax(self, x, axis=None):
41
+ return np.exp(x - self._logsumexp(x, axis=axis, keepdims=True))
42
+
43
+ # pylint: disable=no-self-use
44
+ def _logsumexp(self, a, axis=None, b=None, keepdims=False, return_sign=False):
45
+ if b is not None:
46
+ a, b = np.broadcast_arrays(a, b)
47
+ if np.any(b == 0):
48
+ a = a + 0. # promote to at least float
49
+ a[b == 0] = -np.inf
50
+
51
+ a_max = np.amax(a, axis=axis, keepdims=True)
52
+
53
+ if a_max.ndim > 0:
54
+ a_max[~np.isfinite(a_max)] = 0
55
+ elif not np.isfinite(a_max):
56
+ a_max = 0
57
+
58
+ if b is not None:
59
+ b = np.asarray(b)
60
+ tmp = b * np.exp(a - a_max)
61
+ else:
62
+ tmp = np.exp(a - a_max)
63
+
64
+ # suppress warnings about log of zero
65
+ with np.errstate(divide='ignore'):
66
+ s = np.sum(tmp, axis=axis, keepdims=keepdims)
67
+ if return_sign:
68
+ sgn = np.sign(s)
69
+ s *= sgn # /= makes more sense but we need zero -> zero
70
+ out = np.log(s)
71
+
72
+ if not keepdims:
73
+ a_max = np.squeeze(a_max, axis=axis)
74
+ out += a_max
75
+
76
+ if return_sign:
77
+ return out, sgn
78
+ else:
79
+ return out
80
+
81
+ def _set_pixel_scores(self, pixel_scores):
82
+ "get softmaxed properly shaped pixel scores"
83
+ tmp = np.transpose(pixel_scores, (0, 2, 3, 1))
84
+ return self._softmax(tmp, axis=-1)[0, :, :, 1]
85
+
86
+ def _set_link_scores(self, link_scores):
87
+ "get softmaxed properly shaped links scores"
88
+ tmp = np.transpose(link_scores, (0, 2, 3, 1))
89
+ tmp_reshaped = tmp.reshape(tmp.shape[:-1] + (8, 2))
90
+ return self._softmax(tmp_reshaped, axis=-1)[0, :, :, :, 1]
91
+
92
+ def _find_root(self, point):
93
+ root = point
94
+ update_parent = False
95
+ tmp = self.group_mask[root]
96
+ while tmp is not -1:
97
+ root = tmp
98
+ tmp = self.group_mask[root]
99
+ update_parent = True
100
+ if update_parent:
101
+ self.group_mask[point] = root
102
+ return root
103
+
104
+ def _join(self, p1, p2):
105
+ root1 = self._find_root(p1)
106
+ root2 = self._find_root(p2)
107
+ if root1 != root2:
108
+ self.group_mask[root2] = root1
109
+
110
+ def _get_index(self, root):
111
+ if root not in self.root_map:
112
+ self.root_map[root] = len(self.root_map) + 1
113
+ return self.root_map[root]
114
+
115
+ def _get_all(self):
116
+ self.root_map = {}
117
+ self.mask = np.zeros_like(self.pixel_mask, dtype=np.int32)
118
+
119
+ for point in self.points:
120
+ point_root = self._find_root(point)
121
+ bbox_idx = self._get_index(point_root)
122
+ self.mask[point] = bbox_idx
123
+
124
+ def _get_neighbours_8(self, x, y):
125
+ w, h = self.w, self.h
126
+ tmp = [(0, x - 1, y - 1), (1, x, y - 1),
127
+ (2, x + 1, y - 1), (3, x - 1, y),
128
+ (4, x + 1, y), (5, x - 1, y + 1),
129
+ (6, x, y + 1), (7, x + 1, y + 1)]
130
+
131
+ return [i for i in tmp if i[1] >= 0 and i[1] < w and i[2] >= 0 and i[2] < h]
132
+
133
+ def _get_neighbours_4(self, x, y):
134
+ w, h = self.w, self.h
135
+ tmp = [(1, x, y - 1),
136
+ (3, x - 1, y),
137
+ (4, x + 1, y),
138
+ (6, x, y + 1)]
139
+
140
+ return [i for i in tmp if i[1] >= 0 and i[1] < w and i[2] >= 0 and i[2] < h]
141
+
142
+ def _mask_to_bboxes(self, min_area=300, min_height=10):
143
+ self.bboxes = []
144
+ max_bbox_idx = self.mask.max()
145
+ mask_tmp = cv2.resize(self.mask, (self.image_width, self.image_height), interpolation=cv2.INTER_NEAREST)
146
+
147
+ for bbox_idx in range(1, max_bbox_idx + 1):
148
+ bbox_mask = mask_tmp == bbox_idx
149
+ cnts, _ = cv2.findContours(bbox_mask.astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
150
+ if len(cnts) == 0:
151
+ continue
152
+ cnt = cnts[0]
153
+ rect, w, h = self._min_area_rect(cnt)
154
+ if min(w, h) < min_height:
155
+ continue
156
+ if w * h < min_area:
157
+ continue
158
+ self.bboxes.append(self._order_points(rect))
159
+
160
+ # pylint: disable=no-self-use
161
+ def _min_area_rect(self, cnt):
162
+ rect = cv2.minAreaRect(cnt)
163
+ w, h = rect[1]
164
+ box = cv2.boxPoints(rect)
165
+ box = np.int0(box)
166
+ return box, w, h
167
+
168
+ # pylint: disable=no-self-use
169
+ def _order_points(self, rect):
170
+ """ (x, y)
171
+ Order: TL, TR, BR, BL
172
+ """
173
+ tmp = np.zeros_like(rect)
174
+ sums = rect.sum(axis=1)
175
+ tmp[0] = rect[np.argmin(sums)]
176
+ tmp[2] = rect[np.argmax(sums)]
177
+ diff = np.diff(rect, axis=1)
178
+ tmp[1] = rect[np.argmin(diff)]
179
+ tmp[3] = rect[np.argmax(diff)]
180
+ return tmp
181
+
182
+ def _decode(self):
183
+ for point in self.points:
184
+ y, x = point
185
+ neighbours = self._get_neighbours(x, y)
186
+ for n_idx, nx, ny in neighbours:
187
+ link_value = self.link_mask[y, x, n_idx]
188
+ pixel_cls = self.pixel_mask[ny, nx]
189
+ if link_value and pixel_cls:
190
+ self._join(point, (ny, nx))
191
+
192
+ self._get_all()
193
+ self._mask_to_bboxes()
194
+
195
+ class ModelHandler:
196
+ def __init__(self, labels):
197
+ base_dir = os.path.abspath(os.environ.get("MODEL_PATH",
198
+ "/opt/nuclio/open_model_zoo/intel/text-detection-0004/FP32"))
199
+ model_xml = os.path.join(base_dir, "text-detection-0004.xml")
200
+ model_bin = os.path.join(base_dir, "text-detection-0004.bin")
201
+ self.model = ModelLoader(model_xml, model_bin)
202
+ self.labels = labels
203
+
204
+ def infer(self, image, pixel_threshold, link_threshold):
205
+ output_layer = self.model.infer(image)
206
+
207
+ results = []
208
+ obj_class = 1
209
+ pcd = PixelLinkDecoder(pixel_threshold, link_threshold)
210
+
211
+ pcd.decode(image.height, image.width, output_layer)
212
+ for box in pcd.bboxes:
213
+ mask = pcd.pixel_mask
214
+ mask = np.array(mask, dtype=np.uint8)
215
+ mask = cv2.resize(mask, dsize=(image.width, image.height), interpolation=cv2.INTER_CUBIC)
216
+ cv2.normalize(mask, mask, 0, 255, cv2.NORM_MINMAX)
217
+
218
+ box = box.ravel().tolist()
219
+ x_min = min(box[::2])
220
+ x_max = max(box[::2])
221
+ y_min = min(box[1::2])
222
+ y_max = max(box[1::2])
223
+ cvat_mask = to_cvat_mask((x_min, y_min, x_max, y_max), mask)
224
+
225
+ results.append({
226
+ "confidence": None,
227
+ "label": self.labels.get(obj_class, "unknown"),
228
+ "points": box,
229
+ "mask": cvat_mask,
230
+ "type": "mask",
231
+ })
232
+
233
+ return results