File size: 7,869 Bytes
15e0f69 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 |
import cv2
from PIL import Image
import numpy as np
import time
import math
import gradio as gr
def find_signature_bounding_boxes(image):
# Start measuring time
start_time = time.time()
if image is None:
raise ValueError("Could not open or find the image")
# Binarize the image using Otsu's thresholding method
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Threshold the image using Otsu's method
_, binary_image = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
# Find connected components
num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(binary_image, connectivity=8, ltype=cv2.CV_32S)
# Calculate median area of components
areas = stats[1:, cv2.CC_STAT_AREA] # Exclude background
median_area = np.median(areas)
print('median_area: ' + str(median_area))
median_character_width = int(math.sqrt(median_area))
print('median_character_width: ' + str(median_character_width))
# Define area thresholds
min_area_threshold = median_area * 4
max_area_threshold = median_area * 50
# Filter components based on area thresholds
possible_signatures = []
for i in range(1, num_labels): # Exclude background
area = stats[i, cv2.CC_STAT_AREA]
if min_area_threshold < area < max_area_threshold:
left = stats[i, cv2.CC_STAT_LEFT]
top = stats[i, cv2.CC_STAT_TOP]
width = stats[i, cv2.CC_STAT_WIDTH]
height = stats[i, cv2.CC_STAT_HEIGHT]
print('Found candidate with area: ' + str(area))
#filter horizontal lines
if height < median_character_width * 5 and width > median_character_width*30:
print(' -> candidate is horizontal line with width, height: ' + str(width) + ',' + str(height))
continue
#filter vertical lines
if width < median_character_width * 5 and height > median_character_width*30:
print(' -> candidate is vertical line with width, height: ' + str(width) + ',' + str(height))
continue
#filter on a ratio of black pixels (logos for example have a higher ratio)for now guestimate is 0.3
roi = binary_image[top:top+height, left:left+width]
num_black_pixels = cv2.countNonZero(roi) # Calculate the number of black pixels in the ROI
total_pixels = width * height # Calculate the total number of pixels in the ROI
ratio = num_black_pixels / total_pixels # Calculate and return the ratio of black pixels
print(' -> candidate has black pixel ratio: ' + str(ratio))
if ratio > 0.30:
print(' -> candidate has too high black pixel ratio: ' )
continue
possible_signatures.append((left, top, width, height))
print('Nr of signatures found before merging: ' + str(len(possible_signatures)))
possible_signatures = merge_nearby_rectangles(possible_signatures, nearness=median_character_width*4)
# End measuring time
end_time = time.time()
print(f"Function took {end_time - start_time:.2f} seconds to process the image.")
return possible_signatures
def merge_nearby_rectangles(rectangles, nearness):
def is_near(rect1, rect2):
left1, top1, width1, height1 = rect1
left2, top2, width2, height2 = rect2
right1, bottom1 = left1 + width1, top1 + height1
right2, bottom2 = left2 + width2, top2 + height2
return not (right1 < left2 - nearness or left1 > right2 + nearness or
bottom1 < top2 - nearness or top1 > bottom2 + nearness)
def merge(rect1, rect2):
left1, top1, width1, height1 = rect1
left2, top2, width2, height2 = rect2
right1, bottom1 = left1 + width1, top1 + height1
right2, bottom2 = left2 + width2, top2 + height2
min_left = min(left1, left2)
min_top = min(top1, top2)
max_right = max(right1, right2)
max_bottom = max(bottom1, bottom2)
return (min_left, min_top, max_right - min_left, max_bottom - min_top)
merged = []
while rectangles:
current = rectangles.pop(0)
has_merged = False
for i, other in enumerate(merged):
if is_near(current, other):
merged[i] = merge(current, other)
has_merged = True
break
if not has_merged:
for i in range(len(rectangles) - 1, -1, -1):
if is_near(current, rectangles[i]):
current = merge(current, rectangles.pop(i))
if not has_merged:
merged.append(current)
return merged
def run_detection(input_image):
""""
init_image = input_image.convert("RGB")
original_size = init_image.size
_, image_tensor = image_transform_grounding(init_image)
image_pil: Image = image_transform_grounding_for_vis(init_image)
# run grounidng
boxes, logits, phrases = predict(model, image_tensor, grounding_caption, box_threshold, text_threshold, device='cpu')
annotated_frame = annotate(image_source=np.asarray(image_pil), boxes=boxes, logits=logits, phrases=phrases)
image_with_box = Image.fromarray(cv2.cvtColor(annotated_frame, cv2.COLOR_BGR2RGB))
"""
# inputimage is PIL as RGB
image = np.asarray(input_image.convert("RGB"))
# Find bounding boxes of possible signatures on the document
signatures = find_signature_bounding_boxes(image)
print('Nr of signatures found: ' + str(len(signatures)))
# Draw bounding boxes on the image
for (x, y, w, h) in signatures:
cv2.rectangle(image, (x, y), (x+w, y+h), (0, 255, 0), 2)
image_with_box = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
return image_with_box
if __name__ == "__main__":
css = """
#mkd {
height: 500px;
overflow: auto;
border: 1px solid #ccc;
}
"""
block = gr.Blocks(css=css).queue()
with block:
gr.Markdown("<h1><center>Signature detection<h1><center>")
gr.Markdown("<h3><center>See article<a href='https://github.com/IDEA-Research/GroundingDINO'>Grounding DINO</a><h3><center>")
gr.Markdown("<h3><center>Serves as an example where deep learning is not needed.<h3><center>")
with gr.Row():
with gr.Column():
input_image = gr.Image(source='upload', type="pil")
grounding_caption = gr.Textbox(label="Detection Prompt")
run_button = gr.Button(label="Run")
with gr.Accordion("Advanced options", open=False):
box_threshold = gr.Slider(
label="Box Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.001
)
text_threshold = gr.Slider(
label="Text Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.001
)
with gr.Column():
gallery = gr.outputs.Image(
type="pil",
# label="grounding results"
).style(full_width=True, full_height=True)
# gallery = gr.Gallery(label="Generated images", show_label=False).style(
# grid=[1], height="auto", container=True, full_width=True, full_height=True)
run_button.click(fn=run_detection, inputs=[
input_image], outputs=[gallery])
gr.Examples(
[["Sample1.jpg", "coffee cup"],["Sample2.jpg", "coffee cup"]],
inputs = [input_image],
outputs = [gallery],
fn=run_detection,
cache_examples=True,
label='Try this example input!'
)
block.launch(share=False, show_api=False, show_error=True) |