Spaces:

abugaber
/

aiben

Build error

App Files Files Community

aiben / src /image_utils.py

abugaber

Upload folder using huggingface_hub

3943768 verified 6 months ago

raw

history blame contribute delete

11.9 kB

	import os

	import numpy as np
	from scipy.stats import mode

	from utils import have_cv2, have_pillow
	from enums import images_num_max_dict


	def largest_contour(contours):
	""" Find the largest contour in the list. """
	import cv2
	largest_area = 0
	largest_contour = None
	for contour in contours:
	area = cv2.contourArea(contour)
	if area > largest_area:
	largest_area = area
	largest_contour = contour
	return largest_contour


	def is_contour_acceptable(contour, image, size_threshold=0.1, aspect_ratio_range=(0.5, 2), rotation_threshold=30):
	import cv2
	""" Check if the contour is acceptable based on size, aspect ratio, and rotation. """
	# Size check
	image_area = image.shape[0] * image.shape[1]
	contour_area = cv2.contourArea(contour)
	if contour_area / image_area < size_threshold or contour_area / image_area > 1 - size_threshold:
	return False

	# Aspect ratio check
	x, y, w, h = cv2.boundingRect(contour)
	aspect_ratio = w / h
	if aspect_ratio < aspect_ratio_range[0] or aspect_ratio > aspect_ratio_range[1]:
	return False

	# Rotation check
	_, _, angle = cv2.minAreaRect(contour)
	if angle > rotation_threshold:
	return False

	return True


	def file_to_cv2(img_file):
	import cv2
	image = cv2.imread(img_file)
	assert os.path.isfile(img_file), '%s not found' % img_file
	if image is None:
	# e.g. small BW gif gridnumbers.gif
	from PIL import Image
	import numpy as np
	pil_image = Image.open(img_file).convert('RGB')
	pil_image_file = img_file + '.pil.png'
	pil_image.save(pil_image_file)
	image = cv2.imread(pil_image_file)
	# open_cv_image = np.array(pil_image, dtype=np.unit8)
	## Convert RGB to BGR
	# image = open_cv_image[:, :, ::-1].copy()

	# Check if image is loaded
	if image is None:
	raise ValueError("Error: Image for %s not made." % img_file)
	return image


	def align_image(img_file):
	import cv2
	from imutils.perspective import four_point_transform
	try:
	# Load the image
	# img_file = '/home/jon/Downloads/fastfood.jpg'
	# img_file = "/home/jon/Documents/reciept.jpg"
	image = file_to_cv2(img_file)
	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	blur = cv2.GaussianBlur(gray, (5, 5), 0)

	# Edge detection
	edges = cv2.Canny(blur, 50, 150, apertureSize=3)

	# Find contours
	contours, _ = cv2.findContours(edges, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)

	# Find the largest contour
	largest = largest_contour(contours)

	if largest is not None and is_contour_acceptable(largest, image):
	# Approximate the contour to a polygon
	peri = cv2.arcLength(largest, True)
	approx = cv2.approxPolyDP(largest, 0.02 * peri, True)

	# If the approximated contour has four points, assume it is a quadrilateral
	if len(approx) == 4:
	warped = four_point_transform(image, approx.reshape(4, 2))
	out_file = img_file + "_aligned.jpg"
	cv2.imwrite(out_file, warped)
	return out_file
	else:
	print("Contour is not a quadrilateral.")
	return img_file
	else:
	print("No acceptable contours found.")
	return img_file
	except Exception as e:
	print("Error in align_image:", e, flush=True)
	return img_file


	def correct_rotation(img_file, border_size=50):
	import cv2
	# Function to rotate the image to the correct orientation
	# Load the image
	image = file_to_cv2(img_file)

	# Convert the image to grayscale
	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

	# Detect edges in the image
	edges = cv2.Canny(gray, 50, 150, apertureSize=3)

	# Detect points that form a line using HoughLinesP
	lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=80, minLineLength=100, maxLineGap=10)
	if lines is None or len(lines) == 0:
	return img_file

	# Initialize list of angles
	angles = []

	# Loop over the lines and compute the angle of each line
	for line in lines:
	x1, y1, x2, y2 = line[0]
	angle = np.degrees(np.arctan2(y2 - y1, x2 - x1))
	angles.append(angle)

	# Calculate the most frequent angle in the image
	most_frequent_angle = mode(np.round(angles)).mode

	# Assuming the receipt is horizontal, the text should be near 0 or -180/180 degrees
	# We need to bring the angle to the range (-45, 45) to minimize rotation and keep the text upright
	if most_frequent_angle < -45:
	most_frequent_angle += 90
	elif most_frequent_angle > 45:
	most_frequent_angle -= 90

	# Rotate the original image by the most frequent angle to correct its orientation
	(h, w) = image.shape[:2]
	center = (w // 2, h // 2)
	M = cv2.getRotationMatrix2D(center, most_frequent_angle, 1.0)
	corrected_image = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)

	# Crop the image (removing specified pixels from each border) after rotation
	remove_border_final = False
	if remove_border_final:
	cropped_rotated_image = corrected_image[border_size:-border_size, border_size:-border_size]
	else:
	cropped_rotated_image = corrected_image

	# Save the corrected image
	out_file = img_file + "_rotated.jpg"
	cv2.imwrite(out_file, cropped_rotated_image)

	return out_file


	def pad_resize_image_file(img_file, relaxed_resize=False):
	import cv2

	image = file_to_cv2(img_file)
	if relaxed_resize:
	postfix = "_resized.png"
	image = resize_image(image, return_none_if_no_change=True, max_dimension=2048)
	else:
	postfix = "_pad_resized.png"
	image = pad_resize_image(image, return_none_if_no_change=True)
	if image is None:
	new_file = img_file
	else:
	new_file = img_file + postfix
	cv2.imwrite(new_file, image)

	return new_file


	def resize_image(image, return_none_if_no_change=True, max_dimension=2048):
	import cv2
	height, width = image.shape[:2]

	# Calculate the scaling factor
	if max(height, width) > max_dimension:
	if height > width:
	scale_factor = max_dimension / height
	else:
	scale_factor = max_dimension / width

	# Compute new dimensions
	new_dimensions = (int(width * scale_factor), int(height * scale_factor))

	# Resize the image
	resized_image = cv2.resize(image, new_dimensions, interpolation=cv2.INTER_AREA)
	else:
	# No resizing needed if the image is already within the desired dimensions
	if return_none_if_no_change:
	return None
	resized_image = image
	return resized_image


	def pad_resize_image(image, return_none_if_no_change=False, max_dimension=1024):
	import cv2

	L = max_dimension
	H = max_dimension

	# Load the image
	Li, Hi = image.shape[1], image.shape[0]

	if Li == L and Hi == H:
	if return_none_if_no_change:
	return None
	else:
	return image

	# Calculate the aspect ratio
	aspect_ratio_original = Li / Hi
	aspect_ratio_final = L / H

	# Check the original size and determine the processing needed
	if Li < L and Hi < H:
	# Padding
	padding_x = (L - Li) // 2
	padding_y = (H - Hi) // 2
	image = cv2.copyMakeBorder(image, padding_y, padding_y, padding_x, padding_x, cv2.BORDER_CONSTANT,
	value=[0, 0, 0])
	elif Li > L and Hi > H:
	# Resizing
	if aspect_ratio_original < aspect_ratio_final:
	# The image is taller than the target aspect ratio
	new_height = H
	new_width = int(H * aspect_ratio_original)
	else:
	# The image is wider than the target aspect ratio
	new_width = L
	new_height = int(L / aspect_ratio_original)
	image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
	else:
	# Intermediate case, resize without cropping
	if aspect_ratio_original < aspect_ratio_final:
	# The image is taller than the target aspect ratio
	new_height = H
	new_width = int(H * aspect_ratio_original)
	else:
	# The image is wider than the target aspect ratio
	new_width = L
	new_height = int(L / aspect_ratio_original)
	image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
	padding_x = (L - new_width) // 2
	padding_y = (H - new_height) // 2
	image = cv2.copyMakeBorder(image, padding_y, padding_y, padding_x, padding_x, cv2.BORDER_CONSTANT,
	value=[0, 0, 0])

	# debug, to see effect of pad-resize
	# import cv2
	# cv2.imwrite('new1.png', image)

	return image


	def fix_image_file(file, do_align=False, do_rotate=False, do_pad=False, relaxed_resize=False):
	# always try to fix rotation/alignment since OCR better etc. in that case
	if have_cv2:
	if do_align:
	aligned_image = align_image(file)
	if aligned_image is not None and os.path.isfile(aligned_image):
	file = aligned_image
	if do_rotate:
	derotated_image = correct_rotation(file)
	if derotated_image is not None and os.path.isfile(derotated_image):
	file = derotated_image
	if do_pad or relaxed_resize:
	file = pad_resize_image_file(file, relaxed_resize=relaxed_resize)
	return file


	def get_image_types():
	if have_pillow:
	from PIL import Image
	exts = Image.registered_extensions()
	image_types0 = {ex for ex, f in exts.items() if f in Image.OPEN}
	image_types0 = sorted(image_types0)
	image_types0 = [x[1:] if x.startswith('.') else x for x in image_types0]
	else:
	image_types0 = []
	return image_types0


	def get_image_file(image_file, image_control, document_choice, base_model=None, images_num_max=None,
	image_resolution=None, image_format=None,
	convert=False,
	str_bytes=True):
	if image_control is not None:
	img_file = image_control
	elif image_file is not None:
	img_file = image_file
	else:
	image_types = get_image_types()
	img_file = [x for x in document_choice if
	any(x.endswith('.' + y) for y in image_types)] if document_choice else []

	if not isinstance(img_file, list):
	img_file = [img_file]
	if isinstance(img_file, list) and not img_file:
	img_file = [None]

	final_img_files = []
	for img_file1 in img_file:
	if convert:
	if img_file1 and os.path.isfile(img_file1):
	from vision.utils_vision import img_to_base64
	img_file1 = img_to_base64(img_file1, str_bytes=str_bytes, resolution=image_resolution,
	output_format=image_format)
	elif isinstance(img_file1, str):
	# assume already bytes
	img_file1 = img_file1
	else:
	img_file1 = None
	final_img_files.append(img_file1)
	final_img_files = [x for x in final_img_files if x]
	if base_model and images_num_max == -1:
	images_num_max = images_num_max_dict.get(base_model, 1)
	if base_model and images_num_max is None:
	images_num_max = images_num_max_dict.get(base_model, 1) or 1
	if images_num_max is None:
	images_num_max = len(final_img_files)
	if images_num_max <= -1:
	images_num_max = -images_num_max - 1
	final_img_files = final_img_files[:images_num_max]
	return final_img_files