import os |
import numpy as np |
from scipy.stats import mode |
from utils import have_cv2, have_pillow |
from enums import images_num_max_dict |
def largest_contour(contours): |
""" Find the largest contour in the list. """ |
import cv2 |
largest_area = 0 |
largest_contour = None |
for contour in contours: |
area = cv2.contourArea(contour) |
if area > largest_area: |
largest_area = area |
largest_contour = contour |
return largest_contour |
def is_contour_acceptable(contour, image, size_threshold=0.1, aspect_ratio_range=(0.5, 2), rotation_threshold=30): |
import cv2 |
""" Check if the contour is acceptable based on size, aspect ratio, and rotation. """ |
image_area = image.shape[0] * image.shape[1] |
contour_area = cv2.contourArea(contour) |
if contour_area / image_area < size_threshold or contour_area / image_area > 1 - size_threshold: |
return False |
x, y, w, h = cv2.boundingRect(contour) |
aspect_ratio = w / h |
if aspect_ratio < aspect_ratio_range[0] or aspect_ratio > aspect_ratio_range[1]: |
return False |
_, _, angle = cv2.minAreaRect(contour) |
if angle > rotation_threshold: |
return False |
return True |
def file_to_cv2(img_file): |
import cv2 |
image = cv2.imread(img_file) |
assert os.path.isfile(img_file), '%s not found' % img_file |
if image is None: |
from PIL import Image |
import numpy as np |
pil_image = Image.open(img_file).convert('RGB') |
pil_image_file = img_file + '.pil.png' |
pil_image.save(pil_image_file) |
image = cv2.imread(pil_image_file) |
if image is None: |
raise ValueError("Error: Image for %s not made." % img_file) |
return image |
def align_image(img_file): |
import cv2 |
from imutils.perspective import four_point_transform |
try: |
image = file_to_cv2(img_file) |
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) |
blur = cv2.GaussianBlur(gray, (5, 5), 0) |
edges = cv2.Canny(blur, 50, 150, apertureSize=3) |
contours, _ = cv2.findContours(edges, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) |
largest = largest_contour(contours) |
if largest is not None and is_contour_acceptable(largest, image): |
peri = cv2.arcLength(largest, True) |
approx = cv2.approxPolyDP(largest, 0.02 * peri, True) |
if len(approx) == 4: |
warped = four_point_transform(image, approx.reshape(4, 2)) |
out_file = img_file + "_aligned.jpg" |
cv2.imwrite(out_file, warped) |
return out_file |
else: |
print("Contour is not a quadrilateral.") |
return img_file |
else: |
print("No acceptable contours found.") |
return img_file |
except Exception as e: |
print("Error in align_image:", e, flush=True) |
return img_file |
def correct_rotation(img_file, border_size=50): |
import cv2 |
image = file_to_cv2(img_file) |
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) |
edges = cv2.Canny(gray, 50, 150, apertureSize=3) |
lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=80, minLineLength=100, maxLineGap=10) |
if lines is None or len(lines) == 0: |
return img_file |
angles = [] |
for line in lines: |
x1, y1, x2, y2 = line[0] |
angle = np.degrees(np.arctan2(y2 - y1, x2 - x1)) |
angles.append(angle) |
most_frequent_angle = mode(np.round(angles)).mode |
if most_frequent_angle < -45: |
most_frequent_angle += 90 |
elif most_frequent_angle > 45: |
most_frequent_angle -= 90 |
(h, w) = image.shape[:2] |
center = (w // 2, h // 2) |
M = cv2.getRotationMatrix2D(center, most_frequent_angle, 1.0) |
corrected_image = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE) |
remove_border_final = False |
if remove_border_final: |
cropped_rotated_image = corrected_image[border_size:-border_size, border_size:-border_size] |
else: |
cropped_rotated_image = corrected_image |
out_file = img_file + "_rotated.jpg" |
cv2.imwrite(out_file, cropped_rotated_image) |
return out_file |
def pad_resize_image_file(img_file, relaxed_resize=False): |
import cv2 |
image = file_to_cv2(img_file) |
if relaxed_resize: |
postfix = "_resized.png" |
image = resize_image(image, return_none_if_no_change=True, max_dimension=2048) |
else: |
postfix = "_pad_resized.png" |
image = pad_resize_image(image, return_none_if_no_change=True) |
if image is None: |
new_file = img_file |
else: |
new_file = img_file + postfix |
cv2.imwrite(new_file, image) |
return new_file |
def resize_image(image, return_none_if_no_change=True, max_dimension=2048): |
import cv2 |
height, width = image.shape[:2] |
if max(height, width) > max_dimension: |
if height > width: |
scale_factor = max_dimension / height |
else: |
scale_factor = max_dimension / width |
new_dimensions = (int(width * scale_factor), int(height * scale_factor)) |
resized_image = cv2.resize(image, new_dimensions, interpolation=cv2.INTER_AREA) |
else: |
if return_none_if_no_change: |
return None |
resized_image = image |
return resized_image |
def pad_resize_image(image, return_none_if_no_change=False, max_dimension=1024): |
import cv2 |
L = max_dimension |
H = max_dimension |
Li, Hi = image.shape[1], image.shape[0] |
if Li == L and Hi == H: |
if return_none_if_no_change: |
return None |
else: |
return image |
aspect_ratio_original = Li / Hi |
aspect_ratio_final = L / H |
if Li < L and Hi < H: |
padding_x = (L - Li) // 2 |
padding_y = (H - Hi) // 2 |
image = cv2.copyMakeBorder(image, padding_y, padding_y, padding_x, padding_x, cv2.BORDER_CONSTANT, |
value=[0, 0, 0]) |
elif Li > L and Hi > H: |
if aspect_ratio_original < aspect_ratio_final: |
new_height = H |
new_width = int(H * aspect_ratio_original) |
else: |
new_width = L |
new_height = int(L / aspect_ratio_original) |
image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA) |
else: |
if aspect_ratio_original < aspect_ratio_final: |
new_height = H |
new_width = int(H * aspect_ratio_original) |
else: |
new_width = L |
new_height = int(L / aspect_ratio_original) |
image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA) |
padding_x = (L - new_width) // 2 |
padding_y = (H - new_height) // 2 |
image = cv2.copyMakeBorder(image, padding_y, padding_y, padding_x, padding_x, cv2.BORDER_CONSTANT, |
value=[0, 0, 0]) |
return image |
def fix_image_file(file, do_align=False, do_rotate=False, do_pad=False, relaxed_resize=False): |
if have_cv2: |
if do_align: |
aligned_image = align_image(file) |
if aligned_image is not None and os.path.isfile(aligned_image): |
file = aligned_image |
if do_rotate: |
derotated_image = correct_rotation(file) |
if derotated_image is not None and os.path.isfile(derotated_image): |
file = derotated_image |
if do_pad or relaxed_resize: |
file = pad_resize_image_file(file, relaxed_resize=relaxed_resize) |
return file |
def get_image_types(): |
if have_pillow: |
from PIL import Image |
exts = Image.registered_extensions() |
image_types0 = {ex for ex, f in exts.items() if f in Image.OPEN} |
image_types0 = sorted(image_types0) |
image_types0 = [x[1:] if x.startswith('.') else x for x in image_types0] |
else: |
image_types0 = [] |
return image_types0 |
def get_image_file(image_file, image_control, document_choice, base_model=None, images_num_max=None, |
image_resolution=None, image_format=None, |
convert=False, |
str_bytes=True): |
if image_control is not None: |
img_file = image_control |
elif image_file is not None: |
img_file = image_file |
else: |
image_types = get_image_types() |
img_file = [x for x in document_choice if |
any(x.endswith('.' + y) for y in image_types)] if document_choice else [] |
if not isinstance(img_file, list): |
img_file = [img_file] |
if isinstance(img_file, list) and not img_file: |
img_file = [None] |
final_img_files = [] |
for img_file1 in img_file: |
if convert: |
if img_file1 and os.path.isfile(img_file1): |
from vision.utils_vision import img_to_base64 |
img_file1 = img_to_base64(img_file1, str_bytes=str_bytes, resolution=image_resolution, |
output_format=image_format) |
elif isinstance(img_file1, str): |
img_file1 = img_file1 |
else: |
img_file1 = None |
final_img_files.append(img_file1) |
final_img_files = [x for x in final_img_files if x] |
if base_model and images_num_max == -1: |
images_num_max = images_num_max_dict.get(base_model, 1) |
if base_model and images_num_max is None: |
images_num_max = images_num_max_dict.get(base_model, 1) or 1 |
if images_num_max is None: |
images_num_max = len(final_img_files) |
if images_num_max <= -1: |
images_num_max = -images_num_max - 1 |
final_img_files = final_img_files[:images_num_max] |
return final_img_files |