File size: 6,332 Bytes
9dce458 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
from abc import abstractmethod
from typing import List, Tuple
from collections import Counter
import numpy as np
import cv2
from ..utils import InfererModule, ModelWrapper, Quadrilateral
class CommonDetector(InfererModule):
async def detect(self, image: np.ndarray, detect_size: int, text_threshold: float, box_threshold: float, unclip_ratio: float,
invert: bool, gamma_correct: bool, rotate: bool, auto_rotate: bool = False, verbose: bool = False):
'''
Returns textblock list and text mask.
'''
# Apply filters
img_h, img_w = image.shape[:2]
orig_image = image.copy()
minimum_image_size = 400
# Automatically add border if image too small (instead of simply resizing due to them more likely containing large fonts)
add_border = min(img_w, img_h) < minimum_image_size
if rotate:
self.logger.debug('Adding rotation')
image = self._add_rotation(image)
if add_border:
self.logger.debug('Adding border')
image = self._add_border(image, minimum_image_size)
if invert:
self.logger.debug('Adding inversion')
image = self._add_inversion(image)
if gamma_correct:
self.logger.debug('Adding gamma correction')
image = self._add_gamma_correction(image)
# if True:
# self.logger.debug('Adding histogram equalization')
# image = self._add_histogram_equalization(image)
# cv2.imwrite('histogram.png', image)
# cv2.waitKey(0)
# Run detection
textlines, raw_mask, mask = await self._detect(image, detect_size, text_threshold, box_threshold, unclip_ratio, verbose)
textlines = list(filter(lambda x: x.area > 1, textlines))
# Remove filters
if add_border:
textlines, raw_mask, mask = self._remove_border(image, img_w, img_h, textlines, raw_mask, mask)
if auto_rotate:
# Rotate if horizontal aspect ratios are prevalent to potentially improve detection
if len(textlines) > 0:
orientations = ['h' if txtln.aspect_ratio > 1 else 'v' for txtln in textlines]
majority_orientation = Counter(orientations).most_common(1)[0][0]
else:
majority_orientation = 'h'
if majority_orientation == 'h':
self.logger.info('Rerunning detection with 90° rotation')
return await self.detect(orig_image, detect_size, text_threshold, box_threshold, unclip_ratio, invert, gamma_correct,
rotate=(not rotate), auto_rotate=False, verbose=verbose)
if rotate:
textlines, raw_mask, mask = self._remove_rotation(textlines, raw_mask, mask, img_w, img_h)
return textlines, raw_mask, mask
@abstractmethod
async def _detect(self, image: np.ndarray, detect_size: int, text_threshold: float, box_threshold: float,
unclip_ratio: float, verbose: bool = False) -> Tuple[List[Quadrilateral], np.ndarray, np.ndarray]:
pass
def _add_border(self, image: np.ndarray, target_side_length: int):
old_h, old_w = image.shape[:2]
new_w = new_h = max(old_w, old_h, target_side_length)
new_image = np.zeros([new_h, new_w, 3]).astype(np.uint8)
# new_image[:] = np.array([255, 255, 255], np.uint8)
x, y = 0, 0
# x, y = (new_h - old_h) // 2, (new_w - old_w) // 2
new_image[y:y+old_h, x:x+old_w] = image
return new_image
def _remove_border(self, image: np.ndarray, old_w: int, old_h: int, textlines: List[Quadrilateral], raw_mask, mask):
new_h, new_w = image.shape[:2]
raw_mask = cv2.resize(raw_mask, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
raw_mask = raw_mask[:old_h, :old_w]
if mask is not None:
mask = cv2.resize(mask, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
mask = mask[:old_h, :old_w]
# Filter out regions within the border and clamp the points of the remaining regions
new_textlines = []
for txtln in textlines:
if txtln.xyxy[0] >= old_w and txtln.xyxy[1] >= old_h:
continue
points = txtln.pts
points[:,0] = np.clip(points[:,0], 0, old_w)
points[:,1] = np.clip(points[:,1], 0, old_h)
new_txtln = Quadrilateral(points, txtln.text, txtln.prob)
new_textlines.append(new_txtln)
return new_textlines, raw_mask, mask
def _add_rotation(self, image: np.ndarray):
return np.rot90(image, k=-1)
def _remove_rotation(self, textlines, raw_mask, mask, img_w, img_h):
raw_mask = np.ascontiguousarray(np.rot90(raw_mask))
if mask is not None:
mask = np.ascontiguousarray(np.rot90(mask).astype(np.uint8))
for i, txtln in enumerate(textlines):
rotated_pts = txtln.pts[:,[1,0]]
rotated_pts[:,1] = -rotated_pts[:,1] + img_h
textlines[i] = Quadrilateral(rotated_pts, txtln.text, txtln.prob)
return textlines, raw_mask, mask
def _add_inversion(self, image: np.ndarray):
return cv2.bitwise_not(image)
def _add_gamma_correction(self, image: np.ndarray):
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
mid = 0.5
mean = np.mean(gray)
gamma = np.log(mid * 255) / np.log(mean)
img_gamma = np.power(image, gamma).clip(0,255).astype(np.uint8)
return img_gamma
def _add_histogram_equalization(self, image: np.ndarray):
img_yuv = cv2.cvtColor(image, cv2.COLOR_BGR2YUV)
# equalize the histogram of the Y channel
img_yuv[:,:,0] = cv2.equalizeHist(img_yuv[:,:,0])
# convert the YUV image back to RGB format
img_output = cv2.cvtColor(img_yuv, cv2.COLOR_YUV2BGR)
return img_output
class OfflineDetector(CommonDetector, ModelWrapper):
_MODEL_SUB_DIR = 'detection'
async def _detect(self, *args, **kwargs):
return await self.infer(*args, **kwargs)
@abstractmethod
async def _infer(self, image: np.ndarray, detect_size: int, text_threshold: float, box_threshold: float,
unclip_ratio: float, verbose: bool = False):
pass
|