|
from abc import abstractmethod |
|
from typing import List, Tuple |
|
from collections import Counter |
|
import numpy as np |
|
import cv2 |
|
|
|
from ..utils import InfererModule, ModelWrapper, Quadrilateral |
|
|
|
|
|
class CommonDetector(InfererModule): |
|
|
|
async def detect(self, image: np.ndarray, detect_size: int, text_threshold: float, box_threshold: float, unclip_ratio: float, |
|
invert: bool, gamma_correct: bool, rotate: bool, auto_rotate: bool = False, verbose: bool = False): |
|
''' |
|
Returns textblock list and text mask. |
|
''' |
|
|
|
|
|
img_h, img_w = image.shape[:2] |
|
orig_image = image.copy() |
|
minimum_image_size = 400 |
|
|
|
add_border = min(img_w, img_h) < minimum_image_size |
|
if rotate: |
|
self.logger.debug('Adding rotation') |
|
image = self._add_rotation(image) |
|
if add_border: |
|
self.logger.debug('Adding border') |
|
image = self._add_border(image, minimum_image_size) |
|
if invert: |
|
self.logger.debug('Adding inversion') |
|
image = self._add_inversion(image) |
|
if gamma_correct: |
|
self.logger.debug('Adding gamma correction') |
|
image = self._add_gamma_correction(image) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
textlines, raw_mask, mask = await self._detect(image, detect_size, text_threshold, box_threshold, unclip_ratio, verbose) |
|
textlines = list(filter(lambda x: x.area > 1, textlines)) |
|
|
|
|
|
if add_border: |
|
textlines, raw_mask, mask = self._remove_border(image, img_w, img_h, textlines, raw_mask, mask) |
|
if auto_rotate: |
|
|
|
if len(textlines) > 0: |
|
orientations = ['h' if txtln.aspect_ratio > 1 else 'v' for txtln in textlines] |
|
majority_orientation = Counter(orientations).most_common(1)[0][0] |
|
else: |
|
majority_orientation = 'h' |
|
if majority_orientation == 'h': |
|
self.logger.info('Rerunning detection with 90° rotation') |
|
return await self.detect(orig_image, detect_size, text_threshold, box_threshold, unclip_ratio, invert, gamma_correct, |
|
rotate=(not rotate), auto_rotate=False, verbose=verbose) |
|
if rotate: |
|
textlines, raw_mask, mask = self._remove_rotation(textlines, raw_mask, mask, img_w, img_h) |
|
|
|
return textlines, raw_mask, mask |
|
|
|
@abstractmethod |
|
async def _detect(self, image: np.ndarray, detect_size: int, text_threshold: float, box_threshold: float, |
|
unclip_ratio: float, verbose: bool = False) -> Tuple[List[Quadrilateral], np.ndarray, np.ndarray]: |
|
pass |
|
|
|
def _add_border(self, image: np.ndarray, target_side_length: int): |
|
old_h, old_w = image.shape[:2] |
|
new_w = new_h = max(old_w, old_h, target_side_length) |
|
new_image = np.zeros([new_h, new_w, 3]).astype(np.uint8) |
|
|
|
x, y = 0, 0 |
|
|
|
new_image[y:y+old_h, x:x+old_w] = image |
|
return new_image |
|
|
|
def _remove_border(self, image: np.ndarray, old_w: int, old_h: int, textlines: List[Quadrilateral], raw_mask, mask): |
|
new_h, new_w = image.shape[:2] |
|
raw_mask = cv2.resize(raw_mask, (new_w, new_h), interpolation=cv2.INTER_LINEAR) |
|
raw_mask = raw_mask[:old_h, :old_w] |
|
if mask is not None: |
|
mask = cv2.resize(mask, (new_w, new_h), interpolation=cv2.INTER_LINEAR) |
|
mask = mask[:old_h, :old_w] |
|
|
|
|
|
new_textlines = [] |
|
for txtln in textlines: |
|
if txtln.xyxy[0] >= old_w and txtln.xyxy[1] >= old_h: |
|
continue |
|
points = txtln.pts |
|
points[:,0] = np.clip(points[:,0], 0, old_w) |
|
points[:,1] = np.clip(points[:,1], 0, old_h) |
|
new_txtln = Quadrilateral(points, txtln.text, txtln.prob) |
|
new_textlines.append(new_txtln) |
|
return new_textlines, raw_mask, mask |
|
|
|
def _add_rotation(self, image: np.ndarray): |
|
return np.rot90(image, k=-1) |
|
|
|
def _remove_rotation(self, textlines, raw_mask, mask, img_w, img_h): |
|
raw_mask = np.ascontiguousarray(np.rot90(raw_mask)) |
|
if mask is not None: |
|
mask = np.ascontiguousarray(np.rot90(mask).astype(np.uint8)) |
|
|
|
for i, txtln in enumerate(textlines): |
|
rotated_pts = txtln.pts[:,[1,0]] |
|
rotated_pts[:,1] = -rotated_pts[:,1] + img_h |
|
textlines[i] = Quadrilateral(rotated_pts, txtln.text, txtln.prob) |
|
return textlines, raw_mask, mask |
|
|
|
def _add_inversion(self, image: np.ndarray): |
|
return cv2.bitwise_not(image) |
|
|
|
def _add_gamma_correction(self, image: np.ndarray): |
|
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) |
|
mid = 0.5 |
|
mean = np.mean(gray) |
|
gamma = np.log(mid * 255) / np.log(mean) |
|
img_gamma = np.power(image, gamma).clip(0,255).astype(np.uint8) |
|
return img_gamma |
|
|
|
def _add_histogram_equalization(self, image: np.ndarray): |
|
img_yuv = cv2.cvtColor(image, cv2.COLOR_BGR2YUV) |
|
|
|
|
|
img_yuv[:,:,0] = cv2.equalizeHist(img_yuv[:,:,0]) |
|
|
|
|
|
img_output = cv2.cvtColor(img_yuv, cv2.COLOR_YUV2BGR) |
|
return img_output |
|
|
|
|
|
class OfflineDetector(CommonDetector, ModelWrapper): |
|
_MODEL_SUB_DIR = 'detection' |
|
|
|
async def _detect(self, *args, **kwargs): |
|
return await self.infer(*args, **kwargs) |
|
|
|
@abstractmethod |
|
async def _infer(self, image: np.ndarray, detect_size: int, text_threshold: float, box_threshold: float, |
|
unclip_ratio: float, verbose: bool = False): |
|
pass |
|
|