Joshua Lochner
commited on
Commit
·
d17215e
1
Parent(s):
46722fe
Update pipeline.py
Browse files- pipeline.py +1 -64
pipeline.py
CHANGED
@@ -297,9 +297,7 @@ class PreTrainedPipeline():
|
|
297 |
model=self.model2, tokenizer=self.tokenizer2)
|
298 |
|
299 |
def __call__(self, inputs: str)-> List[Dict[str, Any]]:
|
300 |
-
url = 'https://huggingface.co/datasets/mishig/sample_images/resolve/main/cat-1.jpg'
|
301 |
|
302 |
-
inputs2 = Image.open(requests.get(url, stream=True).raw)
|
303 |
# TEMP testing
|
304 |
# data = [{"video_id": "pqh4LfPeCYs", "start": 835.933, "end": 927.581, "category": "sponsor"}]
|
305 |
# words = get_words("pqh4LfPeCYs")
|
@@ -310,70 +308,9 @@ class PreTrainedPipeline():
|
|
310 |
'end': 927.581,
|
311 |
'category': 'sponsor'
|
312 |
}]
|
313 |
-
segment = data #self.pipeline2(data)
|
314 |
# END TEMP
|
315 |
|
316 |
-
|
317 |
-
# convert img to numpy array, resize and normalize to make the prediction
|
318 |
-
img = np.array(inputs2)
|
319 |
-
|
320 |
-
im = tf.image.resize(img, (128, 128))
|
321 |
-
im = tf.cast(im, tf.float32) / 255.0
|
322 |
-
pred_mask = self.model.predict(im[tf.newaxis, ...])
|
323 |
-
|
324 |
-
# take the best performing class for each pixel
|
325 |
-
# the output of argmax looks like this [[1, 2, 0], ...]
|
326 |
-
pred_mask_arg = tf.argmax(pred_mask, axis=-1)
|
327 |
-
|
328 |
-
labels = []
|
329 |
-
|
330 |
-
# convert the prediction mask into binary masks for each class
|
331 |
-
binary_masks = {}
|
332 |
-
mask_codes = {}
|
333 |
-
|
334 |
-
# when we take tf.argmax() over pred_mask, it becomes a tensor object
|
335 |
-
# the shape becomes TensorShape object, looking like this TensorShape([128])
|
336 |
-
# we need to take get shape, convert to list and take the best one
|
337 |
-
|
338 |
-
rows = pred_mask_arg[0][1].get_shape().as_list()[0]
|
339 |
-
cols = pred_mask_arg[0][2].get_shape().as_list()[0]
|
340 |
-
|
341 |
-
for cls in range(pred_mask.shape[-1]):
|
342 |
-
|
343 |
-
binary_masks[f"mask_{cls}"] = np.zeros(shape = (pred_mask.shape[1], pred_mask.shape[2])) #create masks for each class
|
344 |
-
|
345 |
-
for row in range(rows):
|
346 |
-
|
347 |
-
for col in range(cols):
|
348 |
-
|
349 |
-
if pred_mask_arg[0][row][col] == cls:
|
350 |
-
|
351 |
-
binary_masks[f"mask_{cls}"][row][col] = 1
|
352 |
-
else:
|
353 |
-
binary_masks[f"mask_{cls}"][row][col] = 0
|
354 |
-
|
355 |
-
mask = binary_masks[f"mask_{cls}"]
|
356 |
-
mask *= 255
|
357 |
-
img = Image.fromarray(mask.astype(np.int8), mode="L")
|
358 |
-
|
359 |
-
# we need to make it readable for the widget
|
360 |
-
with io.BytesIO() as out:
|
361 |
-
img.save(out, format="PNG")
|
362 |
-
png_string = out.getvalue()
|
363 |
-
mask = base64.b64encode(png_string).decode("utf-8")
|
364 |
-
|
365 |
-
mask_codes[f"mask_{cls}"] = mask
|
366 |
-
|
367 |
-
|
368 |
-
# widget needs the below format, for each class we return label and mask string
|
369 |
-
labels.append({
|
370 |
-
'inputs': inputs,
|
371 |
-
"label": f"LABEL_{cls}",
|
372 |
-
"mask": mask_codes[f"mask_{cls}"],
|
373 |
-
"score": 1.0,
|
374 |
-
"words": segment
|
375 |
-
})
|
376 |
-
return labels
|
377 |
|
378 |
|
379 |
|
|
|
297 |
model=self.model2, tokenizer=self.tokenizer2)
|
298 |
|
299 |
def __call__(self, inputs: str)-> List[Dict[str, Any]]:
|
|
|
300 |
|
|
|
301 |
# TEMP testing
|
302 |
# data = [{"video_id": "pqh4LfPeCYs", "start": 835.933, "end": 927.581, "category": "sponsor"}]
|
303 |
# words = get_words("pqh4LfPeCYs")
|
|
|
308 |
'end': 927.581,
|
309 |
'category': 'sponsor'
|
310 |
}]
|
|
|
311 |
# END TEMP
|
312 |
|
313 |
+
return self.pipeline2(data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
314 |
|
315 |
|
316 |
|