Spaces:

malvika2003
/

openvino_notebooks

Runtime error

App Files Files Community

openvino_notebooks / notebooks /paddle-ocr-webcam /pre_post_processing.py

malvika2003

Upload folder using huggingface_hub

db5855f verified about 1 year ago

raw

history blame contribute delete

15.9 kB

	import sys
	import cv2
	import numpy as np
	import paddle
	import math

	from PIL import Image, ImageDraw, ImageFont
	import copy
	import imghdr
	from shapely.geometry import Polygon
	import pyclipper
	import string
	from paddle.nn import functional as F


	def DetResizeForTest(data):
	img = data["image"]
	src_h, src_w, _ = img.shape

	####resize image to a size multiple of 32 which is required by the network args:
	###img(array): array with shape [h, w, c]

	limit_side_len = 960
	h, w, c = img.shape

	# limit the max side
	if max(h, w) > limit_side_len:
	if h > w:
	ratio = float(limit_side_len) / h
	else:
	ratio = float(limit_side_len) / w
	else:
	ratio = 1.0

	resize_h = int(h * ratio)
	resize_w = int(w * ratio)

	resize_h = max(int(round(resize_h / 32) * 32), 32)
	resize_w = max(int(round(resize_w / 32) * 32), 32)

	try:
	if int(resize_w) <= 0 or int(resize_h) <= 0:
	return None, (None, None)
	img = cv2.resize(img, (int(resize_w), int(resize_h)))
	except:
	print(img.shape, resize_w, resize_h)
	sys.exit(0)
	ratio_h = resize_h / float(h)
	ratio_w = resize_w / float(w)

	data["image"] = img
	data["shape"] = np.array([src_h, src_w, ratio_h, ratio_w])
	return data


	def NormalizeImage(data):
	"""normalize image such as substract mean, divide std"""

	scale = 1.0 / 255.0
	mean = [0.485, 0.456, 0.406]
	std = [0.229, 0.224, 0.225]

	shape = (1, 1, 3)
	mean = np.array(mean).reshape(shape).astype("float32")
	std = np.array(std).reshape(shape).astype("float32")

	img = data["image"]
	from PIL import Image

	if isinstance(img, Image.Image):
	img = np.array(img)

	assert isinstance(img, np.ndarray), "invalid input 'img' in NormalizeImage"
	data["image"] = (img.astype("float32") * scale - mean) / std
	return data


	def unclip(box):
	unclip_ratio = 2.0
	poly = Polygon(box)
	distance = poly.area * unclip_ratio / poly.length
	offset = pyclipper.PyclipperOffset()
	offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
	expanded = np.array(offset.Execute(distance))
	return expanded


	def get_mini_boxes(contour):
	bounding_box = cv2.minAreaRect(contour)
	points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])

	index_1, index_2, index_3, index_4 = 0, 1, 2, 3
	if points[1][1] > points[0][1]:
	index_1 = 0
	index_4 = 1
	else:
	index_1 = 1
	index_4 = 0
	if points[3][1] > points[2][1]:
	index_2 = 2
	index_3 = 3
	else:
	index_2 = 3
	index_3 = 2

	box = [points[index_1], points[index_2], points[index_3], points[index_4]]
	return box, min(bounding_box[1])


	def box_score_fast(bitmap, _box):
	"""
	box_score_fast: use bbox mean score as the mean score
	"""
	h, w = bitmap.shape[:2]
	box = _box.copy()
	xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int32), 0, w - 1)
	xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int32), 0, w - 1)
	ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int32), 0, h - 1)
	ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int32), 0, h - 1)

	mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
	box[:, 0] = box[:, 0] - xmin
	box[:, 1] = box[:, 1] - ymin
	cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1)
	return cv2.mean(bitmap[ymin : ymax + 1, xmin : xmax + 1], mask)[0]


	def boxes_from_bitmap(pred, _bitmap, dest_width, dest_height):
	"""
	_bitmap: single map with shape (1, H, W),
	whose values are binarized as {0, 1}
	"""

	bitmap = _bitmap
	height, width = bitmap.shape

	outs = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
	if len(outs) == 3:
	img, contours, _ = outs[0], outs[1], outs[2]
	elif len(outs) == 2:
	contours, _ = outs[0], outs[1]

	num_contours = min(len(contours), 1000)
	score_mode = "fast"

	boxes = []
	scores = []
	for index in range(num_contours):
	contour = contours[index]
	points, sside = get_mini_boxes(contour)
	if sside < 3:
	continue
	points = np.array(points)
	if score_mode == "fast":
	score = box_score_fast(pred, points.reshape(-1, 2))
	else:
	score = box_score_slow(pred, contour)
	if 0.7 > score:
	continue

	box = unclip(points).reshape(-1, 1, 2)
	box, sside = get_mini_boxes(box)
	if sside < 3 + 2:
	continue
	box = np.array(box)
	box[:, 0] = np.clip(np.round(box[:, 0] / width * dest_width), 0, dest_width)
	box[:, 1] = np.clip(np.round(box[:, 1] / height * dest_height), 0, dest_height)
	boxes.append(box.astype(np.int16))
	scores.append(score)
	return np.array(boxes, dtype=np.int16), scores


	def filter_tag_det_res(dt_boxes, image_shape):
	img_height, img_width = image_shape[0:2]
	dt_boxes_new = []
	for box in dt_boxes:
	box = order_points_clockwise(box)
	box = clip_det_res(box, img_height, img_width)
	rect_width = int(np.linalg.norm(box[0] - box[1]))
	rect_height = int(np.linalg.norm(box[0] - box[3]))
	if rect_width <= 3 or rect_height <= 3:
	continue
	dt_boxes_new.append(box)
	dt_boxes = np.array(dt_boxes_new)
	return dt_boxes


	def order_points_clockwise(pts):
	"""
	reference from: https://github.com/jrosebr1/imutils/blob/master/imutils/perspective.py
	# sort the points based on their x-coordinates
	"""
	xSorted = pts[np.argsort(pts[:, 0]), :]

	# grab the left-most and right-most points from the sorted
	# x-roodinate points
	leftMost = xSorted[:2, :]
	rightMost = xSorted[2:, :]

	# now, sort the left-most coordinates according to their
	# y-coordinates so we can grab the top-left and bottom-left
	# points, respectively
	leftMost = leftMost[np.argsort(leftMost[:, 1]), :]
	(tl, bl) = leftMost

	rightMost = rightMost[np.argsort(rightMost[:, 1]), :]
	(tr, br) = rightMost

	rect = np.array([tl, tr, br, bl], dtype="float32")
	return rect


	def clip_det_res(points, img_height, img_width):
	for pno in range(points.shape[0]):
	points[pno, 0] = int(min(max(points[pno, 0], 0), img_width - 1))
	points[pno, 1] = int(min(max(points[pno, 1], 0), img_height - 1))
	return points


	def draw_text_det_res(dt_boxes, img_file):
	src_im = img_file
	for box in dt_boxes:
	box = np.array(box).astype(np.int32).reshape(-1, 2)
	cv2.polylines(src_im, [box], True, color=(255, 255, 0), thickness=2)
	return src_im


	def sorted_boxes(dt_boxes):
	"""
	Sort text boxes in order from top to bottom, left to right
	args:
	dt_boxes(array):detected text boxes with shape [4, 2]
	return:
	sorted boxes(array) with shape [4, 2]
	"""
	num_boxes = dt_boxes.shape[0]
	sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0]))
	_boxes = list(sorted_boxes)

	for i in range(num_boxes - 1):
	if abs(_boxes[i + 1][0][1] - _boxes[i][0][1]) < 10 and (_boxes[i + 1][0][0] < _boxes[i][0][0]):
	tmp = _boxes[i]
	_boxes[i] = _boxes[i + 1]
	_boxes[i + 1] = tmp
	return _boxes


	def get_rotate_crop_image(img, points):
	"""
	img_height, img_width = img.shape[0:2]
	left = int(np.min(points[:, 0]))
	right = int(np.max(points[:, 0]))
	top = int(np.min(points[:, 1]))
	bottom = int(np.max(points[:, 1]))
	img_crop = img[top:bottom, left:right, :].copy()
	points[:, 0] = points[:, 0] - left
	points[:, 1] = points[:, 1] - top
	"""
	assert len(points) == 4, "shape of points must be 4*2"
	img_crop_width = int(max(np.linalg.norm(points[0] - points[1]), np.linalg.norm(points[2] - points[3])))
	img_crop_height = int(max(np.linalg.norm(points[0] - points[3]), np.linalg.norm(points[1] - points[2])))
	pts_std = np.float32(
	[
	[0, 0],
	[img_crop_width, 0],
	[img_crop_width, img_crop_height],
	[0, img_crop_height],
	]
	)
	M = cv2.getPerspectiveTransform(points, pts_std)
	dst_img = cv2.warpPerspective(
	img,
	M,
	(img_crop_width, img_crop_height),
	borderMode=cv2.BORDER_REPLICATE,
	flags=cv2.INTER_CUBIC,
	)
	dst_img_height, dst_img_width = dst_img.shape[0:2]
	if dst_img_height * 1.0 / dst_img_width >= 1.5:
	dst_img = np.rot90(dst_img)
	return dst_img


	## Postprocessing for recognition
	postprocess_params = {
	"name": "CTCLabelDecode",
	"character_type": "ch",
	"character_dict_path": "./fonts/ppocr_keys_v1.txt",
	"use_space_char": True,
	}


	class BaseRecLabelDecode(object):
	"""Convert between text-label and text-index"""

	def __init__(self, character_dict_path=None, character_type="ch", use_space_char=False):
	support_character_type = [
	"ch",
	"en",
	"EN_symbol",
	"french",
	"german",
	"japan",
	"korean",
	"it",
	"xi",
	"pu",
	"ru",
	"ar",
	"ta",
	"ug",
	"fa",
	"ur",
	"rs",
	"oc",
	"rsc",
	"bg",
	"uk",
	"be",
	"te",
	"ka",
	"chinese_cht",
	"hi",
	"mr",
	"ne",
	"EN",
	"latin",
	"arabic",
	"cyrillic",
	"devanagari",
	]
	assert character_type in support_character_type, "Only {} are supported now but get {}".format(support_character_type, character_type)

	self.beg_str = "sos"
	self.end_str = "eos"

	if character_type == "en":
	self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
	dict_character = list(self.character_str)
	elif character_type == "EN_symbol":
	# same with ASTER setting (use 94 char).
	self.character_str = string.printable[:-6]
	dict_character = list(self.character_str)
	elif character_type in support_character_type:
	self.character_str = []
	assert character_dict_path is not None, "character_dict_path should not be None when character_type is {}".format(character_type)
	with open(character_dict_path, "rb") as fin:
	lines = fin.readlines()
	for line in lines:
	line = line.decode("utf-8").strip("\n").strip("\r\n")
	self.character_str.append(line)
	if use_space_char:
	self.character_str.append(" ")
	dict_character = list(self.character_str)
	else:
	raise NotImplementedError
	self.character_type = character_type
	dict_character = self.add_special_char(dict_character)
	self.dict = {}
	for i, char in enumerate(dict_character):
	self.dict[char] = i
	self.character = dict_character

	def add_special_char(self, dict_character):
	return dict_character

	def decode(self, text_index, text_prob=None, is_remove_duplicate=False):
	"""convert text-index into text-label."""
	result_list = []
	ignored_tokens = self.get_ignored_tokens()
	batch_size = len(text_index)
	for batch_idx in range(batch_size):
	char_list = []
	conf_list = []
	for idx in range(len(text_index[batch_idx])):
	if text_index[batch_idx][idx] in ignored_tokens:
	continue
	if is_remove_duplicate:
	# only for predict
	if idx > 0 and text_index[batch_idx][idx - 1] == text_index[batch_idx][idx]:
	continue
	char_list.append(self.character[int(text_index[batch_idx][idx])])
	if text_prob is not None:
	conf_list.append(text_prob[batch_idx][idx])
	else:
	conf_list.append(1)
	text = "".join(char_list)
	result_list.append((text, np.mean(conf_list)))
	return result_list

	def get_ignored_tokens(self):
	return [0] # for ctc blank


	class CTCLabelDecode(BaseRecLabelDecode):
	"""Convert between text-label and text-index"""

	def __init__(self, character_dict_path=None, character_type="ch", use_space_char=False, **kwargs):
	super(CTCLabelDecode, self).__init__(character_dict_path, character_type, use_space_char)

	def __call__(self, preds, label=None, args, *kwargs):
	if isinstance(preds, paddle.Tensor):
	preds = preds.numpy()
	preds_idx = preds.argmax(axis=2)
	preds_prob = preds.max(axis=2)
	text = self.decode(preds_idx, preds_prob, is_remove_duplicate=True)
	if label is None:
	return text
	label = self.decode(label)
	return text, label

	def add_special_char(self, dict_character):
	dict_character = ["blank"] + dict_character
	return dict_character


	def build_post_process(config):
	config = copy.deepcopy(config)
	module_name = config.pop("name")
	module_class = eval(module_name)(**config)
	return module_class


	def draw_ocr_box_txt(image, boxes, txts, scores=None, drop_score=0.5, font_path="./fonts/simfang.ttf"):
	h, w = image.height, image.width
	img_left = image.copy()
	img_right = Image.new("RGB", (w, h), (255, 255, 255))

	np.random.seed(0)
	draw_left = ImageDraw.Draw(img_left)
	draw_right = ImageDraw.Draw(img_right)
	for idx, (box, txt) in enumerate(zip(boxes, txts)):
	if scores is not None and scores[idx] < drop_score:
	continue
	color = (
	np.random.randint(0, 255),
	np.random.randint(0, 255),
	np.random.randint(0, 255),
	)
	draw_left.polygon(box, fill=color)
	draw_right.polygon(
	[
	box[0][0],
	box[0][1],
	box[1][0],
	box[1][1],
	box[2][0],
	box[2][1],
	box[3][0],
	box[3][1],
	],
	outline=color,
	)
	box_height = math.sqrt((box[0][0] - box[3][0]) 2 + (box[0][1] - box[3][1]) 2)
	box_width = math.sqrt((box[0][0] - box[1][0]) 2 + (box[0][1] - box[1][1]) 2)
	if box_height > 2 * box_width:
	font_size = max(int(box_width * 0.9), 10)
	font = ImageFont.truetype(font_path, font_size)
	cur_y = box[0][1]
	for c in txt:
	char_size = font.getsize(c)
	draw_right.text((box[0][0] + 3, cur_y), c, fill=(0, 0, 0), font=font)
	cur_y += char_size[1]
	else:
	font_size = max(int(box_height * 0.8), 10)
	font = ImageFont.truetype(font_path, font_size)
	draw_right.text([box[0][0], box[0][1]], txt, fill=(0, 0, 0), font=font)
	img_left = Image.blend(image, img_left, 0.5)
	img_show = Image.new("RGB", (w * 2, h), (255, 255, 255))
	img_show.paste(img_left, (0, 0, w, h))
	img_show.paste(img_right, (w, 0, w * 2, h))
	return np.array(img_show)