|
import os |
|
|
|
import numpy as np |
|
from scipy.stats import mode |
|
|
|
from utils import have_cv2, have_pillow |
|
from enums import images_num_max_dict |
|
|
|
|
|
def largest_contour(contours): |
|
""" Find the largest contour in the list. """ |
|
import cv2 |
|
largest_area = 0 |
|
largest_contour = None |
|
for contour in contours: |
|
area = cv2.contourArea(contour) |
|
if area > largest_area: |
|
largest_area = area |
|
largest_contour = contour |
|
return largest_contour |
|
|
|
|
|
def is_contour_acceptable(contour, image, size_threshold=0.1, aspect_ratio_range=(0.5, 2), rotation_threshold=30): |
|
import cv2 |
|
""" Check if the contour is acceptable based on size, aspect ratio, and rotation. """ |
|
|
|
image_area = image.shape[0] * image.shape[1] |
|
contour_area = cv2.contourArea(contour) |
|
if contour_area / image_area < size_threshold or contour_area / image_area > 1 - size_threshold: |
|
return False |
|
|
|
|
|
x, y, w, h = cv2.boundingRect(contour) |
|
aspect_ratio = w / h |
|
if aspect_ratio < aspect_ratio_range[0] or aspect_ratio > aspect_ratio_range[1]: |
|
return False |
|
|
|
|
|
_, _, angle = cv2.minAreaRect(contour) |
|
if angle > rotation_threshold: |
|
return False |
|
|
|
return True |
|
|
|
|
|
def file_to_cv2(img_file): |
|
import cv2 |
|
image = cv2.imread(img_file) |
|
assert os.path.isfile(img_file), '%s not found' % img_file |
|
if image is None: |
|
|
|
from PIL import Image |
|
import numpy as np |
|
pil_image = Image.open(img_file).convert('RGB') |
|
pil_image_file = img_file + '.pil.png' |
|
pil_image.save(pil_image_file) |
|
image = cv2.imread(pil_image_file) |
|
|
|
|
|
|
|
|
|
|
|
if image is None: |
|
raise ValueError("Error: Image for %s not made." % img_file) |
|
return image |
|
|
|
|
|
def align_image(img_file): |
|
import cv2 |
|
from imutils.perspective import four_point_transform |
|
try: |
|
|
|
|
|
|
|
image = file_to_cv2(img_file) |
|
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) |
|
blur = cv2.GaussianBlur(gray, (5, 5), 0) |
|
|
|
|
|
edges = cv2.Canny(blur, 50, 150, apertureSize=3) |
|
|
|
|
|
contours, _ = cv2.findContours(edges, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) |
|
|
|
|
|
largest = largest_contour(contours) |
|
|
|
if largest is not None and is_contour_acceptable(largest, image): |
|
|
|
peri = cv2.arcLength(largest, True) |
|
approx = cv2.approxPolyDP(largest, 0.02 * peri, True) |
|
|
|
|
|
if len(approx) == 4: |
|
warped = four_point_transform(image, approx.reshape(4, 2)) |
|
out_file = img_file + "_aligned.jpg" |
|
cv2.imwrite(out_file, warped) |
|
return out_file |
|
else: |
|
print("Contour is not a quadrilateral.") |
|
return img_file |
|
else: |
|
print("No acceptable contours found.") |
|
return img_file |
|
except Exception as e: |
|
print("Error in align_image:", e, flush=True) |
|
return img_file |
|
|
|
|
|
def correct_rotation(img_file, border_size=50): |
|
import cv2 |
|
|
|
|
|
image = file_to_cv2(img_file) |
|
|
|
|
|
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) |
|
|
|
|
|
edges = cv2.Canny(gray, 50, 150, apertureSize=3) |
|
|
|
|
|
lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=80, minLineLength=100, maxLineGap=10) |
|
if lines is None or len(lines) == 0: |
|
return img_file |
|
|
|
|
|
angles = [] |
|
|
|
|
|
for line in lines: |
|
x1, y1, x2, y2 = line[0] |
|
angle = np.degrees(np.arctan2(y2 - y1, x2 - x1)) |
|
angles.append(angle) |
|
|
|
|
|
most_frequent_angle = mode(np.round(angles)).mode |
|
|
|
|
|
|
|
if most_frequent_angle < -45: |
|
most_frequent_angle += 90 |
|
elif most_frequent_angle > 45: |
|
most_frequent_angle -= 90 |
|
|
|
|
|
(h, w) = image.shape[:2] |
|
center = (w // 2, h // 2) |
|
M = cv2.getRotationMatrix2D(center, most_frequent_angle, 1.0) |
|
corrected_image = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE) |
|
|
|
|
|
remove_border_final = False |
|
if remove_border_final: |
|
cropped_rotated_image = corrected_image[border_size:-border_size, border_size:-border_size] |
|
else: |
|
cropped_rotated_image = corrected_image |
|
|
|
|
|
out_file = img_file + "_rotated.jpg" |
|
cv2.imwrite(out_file, cropped_rotated_image) |
|
|
|
return out_file |
|
|
|
|
|
def pad_resize_image_file(img_file, relaxed_resize=False): |
|
import cv2 |
|
|
|
image = file_to_cv2(img_file) |
|
if relaxed_resize: |
|
postfix = "_resized.png" |
|
image = resize_image(image, return_none_if_no_change=True, max_dimension=2048) |
|
else: |
|
postfix = "_pad_resized.png" |
|
image = pad_resize_image(image, return_none_if_no_change=True) |
|
if image is None: |
|
new_file = img_file |
|
else: |
|
new_file = img_file + postfix |
|
cv2.imwrite(new_file, image) |
|
|
|
return new_file |
|
|
|
|
|
def resize_image(image, return_none_if_no_change=True, max_dimension=2048): |
|
import cv2 |
|
height, width = image.shape[:2] |
|
|
|
|
|
if max(height, width) > max_dimension: |
|
if height > width: |
|
scale_factor = max_dimension / height |
|
else: |
|
scale_factor = max_dimension / width |
|
|
|
|
|
new_dimensions = (int(width * scale_factor), int(height * scale_factor)) |
|
|
|
|
|
resized_image = cv2.resize(image, new_dimensions, interpolation=cv2.INTER_AREA) |
|
else: |
|
|
|
if return_none_if_no_change: |
|
return None |
|
resized_image = image |
|
return resized_image |
|
|
|
|
|
def pad_resize_image(image, return_none_if_no_change=False, max_dimension=1024): |
|
import cv2 |
|
|
|
L = max_dimension |
|
H = max_dimension |
|
|
|
|
|
Li, Hi = image.shape[1], image.shape[0] |
|
|
|
if Li == L and Hi == H: |
|
if return_none_if_no_change: |
|
return None |
|
else: |
|
return image |
|
|
|
|
|
aspect_ratio_original = Li / Hi |
|
aspect_ratio_final = L / H |
|
|
|
|
|
if Li < L and Hi < H: |
|
|
|
padding_x = (L - Li) // 2 |
|
padding_y = (H - Hi) // 2 |
|
image = cv2.copyMakeBorder(image, padding_y, padding_y, padding_x, padding_x, cv2.BORDER_CONSTANT, |
|
value=[0, 0, 0]) |
|
elif Li > L and Hi > H: |
|
|
|
if aspect_ratio_original < aspect_ratio_final: |
|
|
|
new_height = H |
|
new_width = int(H * aspect_ratio_original) |
|
else: |
|
|
|
new_width = L |
|
new_height = int(L / aspect_ratio_original) |
|
image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA) |
|
else: |
|
|
|
if aspect_ratio_original < aspect_ratio_final: |
|
|
|
new_height = H |
|
new_width = int(H * aspect_ratio_original) |
|
else: |
|
|
|
new_width = L |
|
new_height = int(L / aspect_ratio_original) |
|
image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA) |
|
padding_x = (L - new_width) // 2 |
|
padding_y = (H - new_height) // 2 |
|
image = cv2.copyMakeBorder(image, padding_y, padding_y, padding_x, padding_x, cv2.BORDER_CONSTANT, |
|
value=[0, 0, 0]) |
|
|
|
|
|
|
|
|
|
|
|
return image |
|
|
|
|
|
def fix_image_file(file, do_align=False, do_rotate=False, do_pad=False, relaxed_resize=False): |
|
|
|
if have_cv2: |
|
if do_align: |
|
aligned_image = align_image(file) |
|
if aligned_image is not None and os.path.isfile(aligned_image): |
|
file = aligned_image |
|
if do_rotate: |
|
derotated_image = correct_rotation(file) |
|
if derotated_image is not None and os.path.isfile(derotated_image): |
|
file = derotated_image |
|
if do_pad or relaxed_resize: |
|
file = pad_resize_image_file(file, relaxed_resize=relaxed_resize) |
|
return file |
|
|
|
|
|
def get_image_types(): |
|
if have_pillow: |
|
from PIL import Image |
|
exts = Image.registered_extensions() |
|
image_types0 = {ex for ex, f in exts.items() if f in Image.OPEN} |
|
image_types0 = sorted(image_types0) |
|
image_types0 = [x[1:] if x.startswith('.') else x for x in image_types0] |
|
else: |
|
image_types0 = [] |
|
return image_types0 |
|
|
|
|
|
def get_image_file(image_file, image_control, document_choice, base_model=None, images_num_max=None, |
|
image_resolution=None, image_format=None, |
|
convert=False, |
|
str_bytes=True): |
|
if image_control is not None: |
|
img_file = image_control |
|
elif image_file is not None: |
|
img_file = image_file |
|
else: |
|
image_types = get_image_types() |
|
img_file = [x for x in document_choice if |
|
any(x.endswith('.' + y) for y in image_types)] if document_choice else [] |
|
|
|
if not isinstance(img_file, list): |
|
img_file = [img_file] |
|
if isinstance(img_file, list) and not img_file: |
|
img_file = [None] |
|
|
|
final_img_files = [] |
|
for img_file1 in img_file: |
|
if convert: |
|
if img_file1 and os.path.isfile(img_file1): |
|
from vision.utils_vision import img_to_base64 |
|
img_file1 = img_to_base64(img_file1, str_bytes=str_bytes, resolution=image_resolution, |
|
output_format=image_format) |
|
elif isinstance(img_file1, str): |
|
|
|
img_file1 = img_file1 |
|
else: |
|
img_file1 = None |
|
final_img_files.append(img_file1) |
|
final_img_files = [x for x in final_img_files if x] |
|
if base_model and images_num_max == -1: |
|
images_num_max = images_num_max_dict.get(base_model, 1) |
|
if base_model and images_num_max is None: |
|
images_num_max = images_num_max_dict.get(base_model, 1) or 1 |
|
if images_num_max is None: |
|
images_num_max = len(final_img_files) |
|
if images_num_max <= -1: |
|
images_num_max = -images_num_max - 1 |
|
final_img_files = final_img_files[:images_num_max] |
|
return final_img_files |
|
|