Joshua Lochner
commited on
Commit
·
4b426c4
1
Parent(s):
bd6fd75
Update pipeline.py
Browse files- pipeline.py +73 -7
pipeline.py
CHANGED
@@ -8,9 +8,16 @@ from transformers import (
|
|
8 |
TextClassificationPipeline,
|
9 |
)
|
10 |
from typing import Any, Dict, List
|
|
|
|
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
from PIL import Image
|
13 |
-
|
14 |
CATEGORIES = [None, 'SPONSOR', 'SELFPROMO', 'INTERACTION']
|
15 |
|
16 |
PROFANITY_RAW = '[ __ ]' # How YouTube transcribes profanity
|
@@ -312,15 +319,74 @@ class PreTrainedPipeline():
|
|
312 |
self.pipeline = SponsorBlockClassificationPipeline(
|
313 |
model=self.model, tokenizer=self.tokenizer)
|
314 |
|
315 |
-
def __call__(self, inputs: str) -> List[Dict[str, Any]]:
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
def __call__(self, inputs: "Image.Image")-> List[Dict[str, Any]]:
|
320 |
-
|
321 |
'video_id': 'pqh4LfPeCYs',
|
322 |
'start': 835.933,
|
323 |
'end': 927.581,
|
324 |
'category': 'sponsor'
|
325 |
}]
|
326 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
TextClassificationPipeline,
|
9 |
)
|
10 |
from typing import Any, Dict, List
|
11 |
+
import json
|
12 |
+
from typing import Any, Dict, List
|
13 |
|
14 |
+
import tensorflow as tf
|
15 |
+
from tensorflow import keras
|
16 |
+
import base64
|
17 |
+
import io
|
18 |
+
import os
|
19 |
+
import numpy as np
|
20 |
from PIL import Image
|
|
|
21 |
CATEGORIES = [None, 'SPONSOR', 'SELFPROMO', 'INTERACTION']
|
22 |
|
23 |
PROFANITY_RAW = '[ __ ]' # How YouTube transcribes profanity
|
|
|
319 |
self.pipeline = SponsorBlockClassificationPipeline(
|
320 |
model=self.model, tokenizer=self.tokenizer)
|
321 |
|
322 |
+
# def __call__(self, inputs: str) -> List[Dict[str, Any]]:
|
323 |
+
# json_data = json.loads(inputs)
|
324 |
+
# return self.pipeline(json_data)
|
|
|
325 |
def __call__(self, inputs: "Image.Image")-> List[Dict[str, Any]]:
|
326 |
+
data = [{
|
327 |
'video_id': 'pqh4LfPeCYs',
|
328 |
'start': 835.933,
|
329 |
'end': 927.581,
|
330 |
'category': 'sponsor'
|
331 |
}]
|
332 |
+
results = self.pipeline(data)
|
333 |
+
|
334 |
+
# convert img to numpy array, resize and normalize to make the prediction
|
335 |
+
img = np.array(inputs)
|
336 |
+
|
337 |
+
im = tf.image.resize(img, (128, 128))
|
338 |
+
im = tf.cast(im, tf.float32) / 255.0
|
339 |
+
pred_mask = self.model.predict(im[tf.newaxis, ...])
|
340 |
+
|
341 |
+
# take the best performing class for each pixel
|
342 |
+
# the output of argmax looks like this [[1, 2, 0], ...]
|
343 |
+
pred_mask_arg = tf.argmax(pred_mask, axis=-1)
|
344 |
+
|
345 |
+
labels = []
|
346 |
+
|
347 |
+
# convert the prediction mask into binary masks for each class
|
348 |
+
binary_masks = {}
|
349 |
+
mask_codes = {}
|
350 |
+
|
351 |
+
# when we take tf.argmax() over pred_mask, it becomes a tensor object
|
352 |
+
# the shape becomes TensorShape object, looking like this TensorShape([128])
|
353 |
+
# we need to take get shape, convert to list and take the best one
|
354 |
+
|
355 |
+
rows = pred_mask_arg[0][1].get_shape().as_list()[0]
|
356 |
+
cols = pred_mask_arg[0][2].get_shape().as_list()[0]
|
357 |
+
|
358 |
+
for cls in range(pred_mask.shape[-1]):
|
359 |
+
|
360 |
+
binary_masks[f"mask_{cls}"] = np.zeros(shape = (pred_mask.shape[1], pred_mask.shape[2])) #create masks for each class
|
361 |
+
|
362 |
+
for row in range(rows):
|
363 |
+
|
364 |
+
for col in range(cols):
|
365 |
+
|
366 |
+
if pred_mask_arg[0][row][col] == cls:
|
367 |
+
|
368 |
+
binary_masks[f"mask_{cls}"][row][col] = 1
|
369 |
+
else:
|
370 |
+
binary_masks[f"mask_{cls}"][row][col] = 0
|
371 |
+
|
372 |
+
mask = binary_masks[f"mask_{cls}"]
|
373 |
+
mask *= 255
|
374 |
+
img = Image.fromarray(mask.astype(np.int8), mode="L")
|
375 |
+
|
376 |
+
# we need to make it readable for the widget
|
377 |
+
with io.BytesIO() as out:
|
378 |
+
img.save(out, format="PNG")
|
379 |
+
png_string = out.getvalue()
|
380 |
+
mask = base64.b64encode(png_string).decode("utf-8")
|
381 |
+
|
382 |
+
mask_codes[f"mask_{cls}"] = mask
|
383 |
+
|
384 |
+
|
385 |
+
# widget needs the below format, for each class we return label and mask string
|
386 |
+
labels.append({
|
387 |
+
"label": f"LABEL_{cls}",
|
388 |
+
"mask": mask_codes[f"mask_{cls}"],
|
389 |
+
"score": 1.0,
|
390 |
+
"q": results
|
391 |
+
})
|
392 |
+
return labels
|