Spaces:
Runtime error
Runtime error
import sys | |
import cv2 | |
import numpy as np | |
import paddle | |
import math | |
from PIL import Image, ImageDraw, ImageFont | |
import copy | |
import imghdr | |
from shapely.geometry import Polygon | |
import pyclipper | |
import string | |
from paddle.nn import functional as F | |
def DetResizeForTest(data): | |
img = data["image"] | |
src_h, src_w, _ = img.shape | |
####resize image to a size multiple of 32 which is required by the network args: | |
###img(array): array with shape [h, w, c] | |
limit_side_len = 960 | |
h, w, c = img.shape | |
# limit the max side | |
if max(h, w) > limit_side_len: | |
if h > w: | |
ratio = float(limit_side_len) / h | |
else: | |
ratio = float(limit_side_len) / w | |
else: | |
ratio = 1.0 | |
resize_h = int(h * ratio) | |
resize_w = int(w * ratio) | |
resize_h = max(int(round(resize_h / 32) * 32), 32) | |
resize_w = max(int(round(resize_w / 32) * 32), 32) | |
try: | |
if int(resize_w) <= 0 or int(resize_h) <= 0: | |
return None, (None, None) | |
img = cv2.resize(img, (int(resize_w), int(resize_h))) | |
except: | |
print(img.shape, resize_w, resize_h) | |
sys.exit(0) | |
ratio_h = resize_h / float(h) | |
ratio_w = resize_w / float(w) | |
data["image"] = img | |
data["shape"] = np.array([src_h, src_w, ratio_h, ratio_w]) | |
return data | |
def NormalizeImage(data): | |
"""normalize image such as substract mean, divide std""" | |
scale = 1.0 / 255.0 | |
mean = [0.485, 0.456, 0.406] | |
std = [0.229, 0.224, 0.225] | |
shape = (1, 1, 3) | |
mean = np.array(mean).reshape(shape).astype("float32") | |
std = np.array(std).reshape(shape).astype("float32") | |
img = data["image"] | |
from PIL import Image | |
if isinstance(img, Image.Image): | |
img = np.array(img) | |
assert isinstance(img, np.ndarray), "invalid input 'img' in NormalizeImage" | |
data["image"] = (img.astype("float32") * scale - mean) / std | |
return data | |
def unclip(box): | |
unclip_ratio = 2.0 | |
poly = Polygon(box) | |
distance = poly.area * unclip_ratio / poly.length | |
offset = pyclipper.PyclipperOffset() | |
offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON) | |
expanded = np.array(offset.Execute(distance)) | |
return expanded | |
def get_mini_boxes(contour): | |
bounding_box = cv2.minAreaRect(contour) | |
points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0]) | |
index_1, index_2, index_3, index_4 = 0, 1, 2, 3 | |
if points[1][1] > points[0][1]: | |
index_1 = 0 | |
index_4 = 1 | |
else: | |
index_1 = 1 | |
index_4 = 0 | |
if points[3][1] > points[2][1]: | |
index_2 = 2 | |
index_3 = 3 | |
else: | |
index_2 = 3 | |
index_3 = 2 | |
box = [points[index_1], points[index_2], points[index_3], points[index_4]] | |
return box, min(bounding_box[1]) | |
def box_score_fast(bitmap, _box): | |
""" | |
box_score_fast: use bbox mean score as the mean score | |
""" | |
h, w = bitmap.shape[:2] | |
box = _box.copy() | |
xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int32), 0, w - 1) | |
xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int32), 0, w - 1) | |
ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int32), 0, h - 1) | |
ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int32), 0, h - 1) | |
mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8) | |
box[:, 0] = box[:, 0] - xmin | |
box[:, 1] = box[:, 1] - ymin | |
cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1) | |
return cv2.mean(bitmap[ymin : ymax + 1, xmin : xmax + 1], mask)[0] | |
def boxes_from_bitmap(pred, _bitmap, dest_width, dest_height): | |
""" | |
_bitmap: single map with shape (1, H, W), | |
whose values are binarized as {0, 1} | |
""" | |
bitmap = _bitmap | |
height, width = bitmap.shape | |
outs = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) | |
if len(outs) == 3: | |
img, contours, _ = outs[0], outs[1], outs[2] | |
elif len(outs) == 2: | |
contours, _ = outs[0], outs[1] | |
num_contours = min(len(contours), 1000) | |
score_mode = "fast" | |
boxes = [] | |
scores = [] | |
for index in range(num_contours): | |
contour = contours[index] | |
points, sside = get_mini_boxes(contour) | |
if sside < 3: | |
continue | |
points = np.array(points) | |
if score_mode == "fast": | |
score = box_score_fast(pred, points.reshape(-1, 2)) | |
else: | |
score = box_score_slow(pred, contour) | |
if 0.7 > score: | |
continue | |
box = unclip(points).reshape(-1, 1, 2) | |
box, sside = get_mini_boxes(box) | |
if sside < 3 + 2: | |
continue | |
box = np.array(box) | |
box[:, 0] = np.clip(np.round(box[:, 0] / width * dest_width), 0, dest_width) | |
box[:, 1] = np.clip(np.round(box[:, 1] / height * dest_height), 0, dest_height) | |
boxes.append(box.astype(np.int16)) | |
scores.append(score) | |
return np.array(boxes, dtype=np.int16), scores | |
def filter_tag_det_res(dt_boxes, image_shape): | |
img_height, img_width = image_shape[0:2] | |
dt_boxes_new = [] | |
for box in dt_boxes: | |
box = order_points_clockwise(box) | |
box = clip_det_res(box, img_height, img_width) | |
rect_width = int(np.linalg.norm(box[0] - box[1])) | |
rect_height = int(np.linalg.norm(box[0] - box[3])) | |
if rect_width <= 3 or rect_height <= 3: | |
continue | |
dt_boxes_new.append(box) | |
dt_boxes = np.array(dt_boxes_new) | |
return dt_boxes | |
def order_points_clockwise(pts): | |
""" | |
reference from: https://github.com/jrosebr1/imutils/blob/master/imutils/perspective.py | |
# sort the points based on their x-coordinates | |
""" | |
xSorted = pts[np.argsort(pts[:, 0]), :] | |
# grab the left-most and right-most points from the sorted | |
# x-roodinate points | |
leftMost = xSorted[:2, :] | |
rightMost = xSorted[2:, :] | |
# now, sort the left-most coordinates according to their | |
# y-coordinates so we can grab the top-left and bottom-left | |
# points, respectively | |
leftMost = leftMost[np.argsort(leftMost[:, 1]), :] | |
(tl, bl) = leftMost | |
rightMost = rightMost[np.argsort(rightMost[:, 1]), :] | |
(tr, br) = rightMost | |
rect = np.array([tl, tr, br, bl], dtype="float32") | |
return rect | |
def clip_det_res(points, img_height, img_width): | |
for pno in range(points.shape[0]): | |
points[pno, 0] = int(min(max(points[pno, 0], 0), img_width - 1)) | |
points[pno, 1] = int(min(max(points[pno, 1], 0), img_height - 1)) | |
return points | |
def draw_text_det_res(dt_boxes, img_file): | |
src_im = img_file | |
for box in dt_boxes: | |
box = np.array(box).astype(np.int32).reshape(-1, 2) | |
cv2.polylines(src_im, [box], True, color=(255, 255, 0), thickness=2) | |
return src_im | |
def sorted_boxes(dt_boxes): | |
""" | |
Sort text boxes in order from top to bottom, left to right | |
args: | |
dt_boxes(array):detected text boxes with shape [4, 2] | |
return: | |
sorted boxes(array) with shape [4, 2] | |
""" | |
num_boxes = dt_boxes.shape[0] | |
sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0])) | |
_boxes = list(sorted_boxes) | |
for i in range(num_boxes - 1): | |
if abs(_boxes[i + 1][0][1] - _boxes[i][0][1]) < 10 and (_boxes[i + 1][0][0] < _boxes[i][0][0]): | |
tmp = _boxes[i] | |
_boxes[i] = _boxes[i + 1] | |
_boxes[i + 1] = tmp | |
return _boxes | |
def get_rotate_crop_image(img, points): | |
""" | |
img_height, img_width = img.shape[0:2] | |
left = int(np.min(points[:, 0])) | |
right = int(np.max(points[:, 0])) | |
top = int(np.min(points[:, 1])) | |
bottom = int(np.max(points[:, 1])) | |
img_crop = img[top:bottom, left:right, :].copy() | |
points[:, 0] = points[:, 0] - left | |
points[:, 1] = points[:, 1] - top | |
""" | |
assert len(points) == 4, "shape of points must be 4*2" | |
img_crop_width = int(max(np.linalg.norm(points[0] - points[1]), np.linalg.norm(points[2] - points[3]))) | |
img_crop_height = int(max(np.linalg.norm(points[0] - points[3]), np.linalg.norm(points[1] - points[2]))) | |
pts_std = np.float32( | |
[ | |
[0, 0], | |
[img_crop_width, 0], | |
[img_crop_width, img_crop_height], | |
[0, img_crop_height], | |
] | |
) | |
M = cv2.getPerspectiveTransform(points, pts_std) | |
dst_img = cv2.warpPerspective( | |
img, | |
M, | |
(img_crop_width, img_crop_height), | |
borderMode=cv2.BORDER_REPLICATE, | |
flags=cv2.INTER_CUBIC, | |
) | |
dst_img_height, dst_img_width = dst_img.shape[0:2] | |
if dst_img_height * 1.0 / dst_img_width >= 1.5: | |
dst_img = np.rot90(dst_img) | |
return dst_img | |
## Postprocessing for recognition | |
postprocess_params = { | |
"name": "CTCLabelDecode", | |
"character_type": "ch", | |
"character_dict_path": "./fonts/ppocr_keys_v1.txt", | |
"use_space_char": True, | |
} | |
class BaseRecLabelDecode(object): | |
"""Convert between text-label and text-index""" | |
def __init__(self, character_dict_path=None, character_type="ch", use_space_char=False): | |
support_character_type = [ | |
"ch", | |
"en", | |
"EN_symbol", | |
"french", | |
"german", | |
"japan", | |
"korean", | |
"it", | |
"xi", | |
"pu", | |
"ru", | |
"ar", | |
"ta", | |
"ug", | |
"fa", | |
"ur", | |
"rs", | |
"oc", | |
"rsc", | |
"bg", | |
"uk", | |
"be", | |
"te", | |
"ka", | |
"chinese_cht", | |
"hi", | |
"mr", | |
"ne", | |
"EN", | |
"latin", | |
"arabic", | |
"cyrillic", | |
"devanagari", | |
] | |
assert character_type in support_character_type, "Only {} are supported now but get {}".format(support_character_type, character_type) | |
self.beg_str = "sos" | |
self.end_str = "eos" | |
if character_type == "en": | |
self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz" | |
dict_character = list(self.character_str) | |
elif character_type == "EN_symbol": | |
# same with ASTER setting (use 94 char). | |
self.character_str = string.printable[:-6] | |
dict_character = list(self.character_str) | |
elif character_type in support_character_type: | |
self.character_str = [] | |
assert character_dict_path is not None, "character_dict_path should not be None when character_type is {}".format(character_type) | |
with open(character_dict_path, "rb") as fin: | |
lines = fin.readlines() | |
for line in lines: | |
line = line.decode("utf-8").strip("\n").strip("\r\n") | |
self.character_str.append(line) | |
if use_space_char: | |
self.character_str.append(" ") | |
dict_character = list(self.character_str) | |
else: | |
raise NotImplementedError | |
self.character_type = character_type | |
dict_character = self.add_special_char(dict_character) | |
self.dict = {} | |
for i, char in enumerate(dict_character): | |
self.dict[char] = i | |
self.character = dict_character | |
def add_special_char(self, dict_character): | |
return dict_character | |
def decode(self, text_index, text_prob=None, is_remove_duplicate=False): | |
"""convert text-index into text-label.""" | |
result_list = [] | |
ignored_tokens = self.get_ignored_tokens() | |
batch_size = len(text_index) | |
for batch_idx in range(batch_size): | |
char_list = [] | |
conf_list = [] | |
for idx in range(len(text_index[batch_idx])): | |
if text_index[batch_idx][idx] in ignored_tokens: | |
continue | |
if is_remove_duplicate: | |
# only for predict | |
if idx > 0 and text_index[batch_idx][idx - 1] == text_index[batch_idx][idx]: | |
continue | |
char_list.append(self.character[int(text_index[batch_idx][idx])]) | |
if text_prob is not None: | |
conf_list.append(text_prob[batch_idx][idx]) | |
else: | |
conf_list.append(1) | |
text = "".join(char_list) | |
result_list.append((text, np.mean(conf_list))) | |
return result_list | |
def get_ignored_tokens(self): | |
return [0] # for ctc blank | |
class CTCLabelDecode(BaseRecLabelDecode): | |
"""Convert between text-label and text-index""" | |
def __init__(self, character_dict_path=None, character_type="ch", use_space_char=False, **kwargs): | |
super(CTCLabelDecode, self).__init__(character_dict_path, character_type, use_space_char) | |
def __call__(self, preds, label=None, *args, **kwargs): | |
if isinstance(preds, paddle.Tensor): | |
preds = preds.numpy() | |
preds_idx = preds.argmax(axis=2) | |
preds_prob = preds.max(axis=2) | |
text = self.decode(preds_idx, preds_prob, is_remove_duplicate=True) | |
if label is None: | |
return text | |
label = self.decode(label) | |
return text, label | |
def add_special_char(self, dict_character): | |
dict_character = ["blank"] + dict_character | |
return dict_character | |
def build_post_process(config): | |
config = copy.deepcopy(config) | |
module_name = config.pop("name") | |
module_class = eval(module_name)(**config) | |
return module_class | |
def draw_ocr_box_txt(image, boxes, txts, scores=None, drop_score=0.5, font_path="./fonts/simfang.ttf"): | |
h, w = image.height, image.width | |
img_left = image.copy() | |
img_right = Image.new("RGB", (w, h), (255, 255, 255)) | |
np.random.seed(0) | |
draw_left = ImageDraw.Draw(img_left) | |
draw_right = ImageDraw.Draw(img_right) | |
for idx, (box, txt) in enumerate(zip(boxes, txts)): | |
if scores is not None and scores[idx] < drop_score: | |
continue | |
color = ( | |
np.random.randint(0, 255), | |
np.random.randint(0, 255), | |
np.random.randint(0, 255), | |
) | |
draw_left.polygon(box, fill=color) | |
draw_right.polygon( | |
[ | |
box[0][0], | |
box[0][1], | |
box[1][0], | |
box[1][1], | |
box[2][0], | |
box[2][1], | |
box[3][0], | |
box[3][1], | |
], | |
outline=color, | |
) | |
box_height = math.sqrt((box[0][0] - box[3][0]) ** 2 + (box[0][1] - box[3][1]) ** 2) | |
box_width = math.sqrt((box[0][0] - box[1][0]) ** 2 + (box[0][1] - box[1][1]) ** 2) | |
if box_height > 2 * box_width: | |
font_size = max(int(box_width * 0.9), 10) | |
font = ImageFont.truetype(font_path, font_size) | |
cur_y = box[0][1] | |
for c in txt: | |
char_size = font.getsize(c) | |
draw_right.text((box[0][0] + 3, cur_y), c, fill=(0, 0, 0), font=font) | |
cur_y += char_size[1] | |
else: | |
font_size = max(int(box_height * 0.8), 10) | |
font = ImageFont.truetype(font_path, font_size) | |
draw_right.text([box[0][0], box[0][1]], txt, fill=(0, 0, 0), font=font) | |
img_left = Image.blend(image, img_left, 0.5) | |
img_show = Image.new("RGB", (w * 2, h), (255, 255, 255)) | |
img_show.paste(img_left, (0, 0, w, h)) | |
img_show.paste(img_right, (w, 0, w * 2, h)) | |
return np.array(img_show) | |