|
from PIL import ImageDraw |
|
import numpy as np |
|
import re |
|
|
|
|
|
|
|
colormap = [ |
|
"#0000FF", |
|
"#FFA500", |
|
"#008000", |
|
"#800080", |
|
"#A52A2A", |
|
"#FFC0CB", |
|
"#808080", |
|
"#808000", |
|
"#00FFFF", |
|
"#FF0000", |
|
"#00FF00", |
|
"#4B0082", |
|
"#4B0082", |
|
"#EE82EE", |
|
"#00FFFF", |
|
"#FF00FF", |
|
"#FF7F50", |
|
"#FFD700", |
|
"#87CEEB", |
|
] |
|
|
|
|
|
|
|
def clean_text(text): |
|
""" |
|
Cleans the given text by removing unwanted tokens, extra spaces, |
|
and ensures proper spacing between words and after periods. |
|
|
|
Args: |
|
text (str): The input text to be cleaned. |
|
|
|
Returns: |
|
str: The cleaned and properly formatted text. |
|
""" |
|
|
|
|
|
text = text.replace("<pad>", "").replace("</s>", "").strip() |
|
|
|
|
|
lines = text.split("\n") |
|
cleaned_lines = [line.strip() for line in lines if line.strip()] |
|
|
|
|
|
cleaned_text = " ".join(cleaned_lines) |
|
|
|
|
|
cleaned_text = re.sub( |
|
r"\s+", " ", cleaned_text |
|
) |
|
cleaned_text = re.sub( |
|
r"(?<=[.])(?=[^\s])", r" ", cleaned_text |
|
) |
|
|
|
|
|
return cleaned_text |
|
|
|
|
|
|
|
def hex_to_rgba(hex_color, alpha): |
|
""" |
|
Convert a hexadecimal color code to RGBA format. |
|
|
|
Args: |
|
hex_color (str): The hexadecimal color code (e.g., "#FF0000"). |
|
alpha (int): The alpha value for the RGBA color (0-255). |
|
|
|
Returns: |
|
tuple: A tuple representing the RGBA color values (red, green, blue, alpha). |
|
""" |
|
hex_color = hex_color.lstrip("#") |
|
r, g, b = int(hex_color[0:2], 16), int(hex_color[2:4], 16), int(hex_color[4:6], 16) |
|
return (r, g, b, alpha) |
|
|
|
|
|
|
|
def draw_ocr_bboxes(image, prediction): |
|
""" |
|
Draw bounding boxes with enhanced visual elements on the given image based on the OCR prediction. |
|
|
|
Args: |
|
image (PIL.Image.Image): The input image on which the bounding boxes will be drawn. |
|
prediction (dict): The OCR prediction containing 'quad_boxes' and 'labels'. |
|
|
|
Returns: |
|
PIL.Image.Image: The image with the bounding boxes drawn. |
|
""" |
|
|
|
|
|
draw = ImageDraw.Draw(image, "RGBA") |
|
|
|
|
|
bboxes, labels = prediction["quad_boxes"], prediction["labels"] |
|
|
|
for i, (box, label) in enumerate(zip(bboxes, labels)): |
|
|
|
color = colormap[i % len(colormap)] |
|
new_box = (np.array(box)).tolist() |
|
|
|
|
|
box_outline_width = 3 |
|
corner_radius = 10 |
|
|
|
|
|
for j in range(4): |
|
start_x, start_y = new_box[j * 2], new_box[j * 2 + 1] |
|
end_x, end_y = new_box[(j * 2 + 2) % 8], new_box[(j * 2 + 3) % 8] |
|
|
|
|
|
draw.arc( |
|
[ |
|
(start_x - corner_radius, start_y - corner_radius), |
|
(start_x + corner_radius, start_y + corner_radius), |
|
], |
|
90 + j * 90, |
|
180 + j * 90, |
|
fill=color, |
|
width=box_outline_width, |
|
) |
|
draw.arc( |
|
[ |
|
(end_x - corner_radius, end_y - corner_radius), |
|
(end_x + corner_radius, end_y + corner_radius), |
|
], |
|
j * 90, |
|
90 + j * 90, |
|
fill=color, |
|
width=box_outline_width, |
|
) |
|
|
|
|
|
if j in [0, 1, 2]: |
|
draw.line( |
|
[ |
|
(start_x + corner_radius if j != 1 else start_x, start_y), |
|
(end_x - corner_radius if j != 1 else end_x, end_y), |
|
], |
|
fill=color, |
|
width=box_outline_width, |
|
) |
|
else: |
|
draw.line( |
|
[ |
|
(start_x, start_y + corner_radius), |
|
(end_x, end_y - corner_radius), |
|
], |
|
fill=color, |
|
width=box_outline_width, |
|
) |
|
|
|
|
|
text_x, text_y = min(new_box[0::2]), min(new_box[1::2]) - 20 |
|
text_w, text_h = draw.textlength(label) |
|
rgba_color = hex_to_rgba(color, 200) |
|
|
|
|
|
draw.rectangle( |
|
[text_x, text_y, text_x + text_w + 10, text_y + text_h + 10], |
|
fill=rgba_color, |
|
) |
|
|
|
|
|
draw.text((text_x + 5, text_y + 5), label, fill=(0, 0, 0, 255)) |
|
|
|
|
|
return image |
|
|