sitammeur commited on
Commit
c1cef3f
1 Parent(s): ae5368a

Update utils functions

Browse files
Files changed (1) hide show
  1. src/utils.py +32 -126
src/utils.py CHANGED
@@ -1,30 +1,7 @@
1
- from PIL import ImageDraw
2
- import numpy as np
3
  import re
4
-
5
-
6
- # Use a color map for bounding boxes
7
- colormap = [
8
- "#0000FF",
9
- "#FFA500",
10
- "#008000",
11
- "#800080",
12
- "#A52A2A",
13
- "#FFC0CB",
14
- "#808080",
15
- "#808000",
16
- "#00FFFF",
17
- "#FF0000",
18
- "#00FF00",
19
- "#4B0082",
20
- "#4B0082",
21
- "#EE82EE",
22
- "#00FFFF",
23
- "#FF00FF",
24
- "#FF7F50",
25
- "#FFD700",
26
- "#87CEEB",
27
- ]
28
 
29
 
30
  # Text cleaning function
@@ -62,111 +39,40 @@ def clean_text(text):
62
  return cleaned_text
63
 
64
 
65
- # Convert hex color to RGBA with the given alpha
66
- def hex_to_rgba(hex_color, alpha):
67
- """
68
- Convert a hexadecimal color code to RGBA format.
69
-
70
- Args:
71
- hex_color (str): The hexadecimal color code (e.g., "#FF0000").
72
- alpha (int): The alpha value for the RGBA color (0-255).
73
-
74
- Returns:
75
- tuple: A tuple representing the RGBA color values (red, green, blue, alpha).
76
- """
77
- hex_color = hex_color.lstrip("#")
78
- r, g, b = int(hex_color[0:2], 16), int(hex_color[2:4], 16), int(hex_color[4:6], 16)
79
- return (r, g, b, alpha)
80
-
81
-
82
  # Draw OCR bounding boxes with enhanced visual elements
83
- def draw_ocr_bboxes(image, prediction):
84
  """
85
- Draw bounding boxes with enhanced visual elements on the given image based on the OCR prediction.
86
 
87
  Args:
88
- image (PIL.Image.Image): The input image on which the bounding boxes will be drawn.
89
- prediction (dict): The OCR prediction containing 'quad_boxes' and 'labels'.
90
 
91
  Returns:
92
- PIL.Image.Image: The image with the bounding boxes drawn.
 
93
  """
94
 
95
- # Create a drawing object for the image with RGBA mode
96
- draw = ImageDraw.Draw(image, "RGBA")
97
-
98
- # Extract bounding boxes and labels from the prediction
99
- bboxes, labels = prediction["quad_boxes"], prediction["labels"]
100
-
101
- for i, (box, label) in enumerate(zip(bboxes, labels)):
102
- # Select color for the bounding box and label
103
- color = colormap[i % len(colormap)]
104
- new_box = (np.array(box)).tolist()
105
-
106
- # Define the outline width and corner radius for the bounding box
107
- box_outline_width = 3
108
- corner_radius = 10
109
-
110
- # Draw rounded corners for the bounding box
111
- for j in range(4):
112
- start_x, start_y = new_box[j * 2], new_box[j * 2 + 1]
113
- end_x, end_y = new_box[(j * 2 + 2) % 8], new_box[(j * 2 + 3) % 8]
114
-
115
- # Draw the arcs for the rounded corners
116
- draw.arc(
117
- [
118
- (start_x - corner_radius, start_y - corner_radius),
119
- (start_x + corner_radius, start_y + corner_radius),
120
- ],
121
- 90 + j * 90,
122
- 180 + j * 90,
123
- fill=color,
124
- width=box_outline_width,
125
- )
126
- draw.arc(
127
- [
128
- (end_x - corner_radius, end_y - corner_radius),
129
- (end_x + corner_radius, end_y + corner_radius),
130
- ],
131
- j * 90,
132
- 90 + j * 90,
133
- fill=color,
134
- width=box_outline_width,
135
- )
136
-
137
- # Draw the lines connecting the arcs
138
- if j in [0, 1, 2]:
139
- draw.line(
140
- [
141
- (start_x + corner_radius if j != 1 else start_x, start_y),
142
- (end_x - corner_radius if j != 1 else end_x, end_y),
143
- ],
144
- fill=color,
145
- width=box_outline_width,
146
- )
147
- else:
148
- draw.line(
149
- [
150
- (start_x, start_y + corner_radius),
151
- (end_x, end_y - corner_radius),
152
- ],
153
- fill=color,
154
- width=box_outline_width,
155
- )
156
-
157
- # Calculate the position for the text label
158
- text_x, text_y = min(new_box[0::2]), min(new_box[1::2]) - 20
159
- text_w, text_h = draw.textsize(label)
160
- rgba_color = hex_to_rgba(color, 200) # Semi-transparent background for text
161
-
162
- # Draw the background rectangle for the text
163
- draw.rectangle(
164
- [text_x, text_y, text_x + text_w + 10, text_y + text_h + 10],
165
- fill=rgba_color,
166
- )
167
-
168
- # Draw the text label
169
- draw.text((text_x + 5, text_y + 5), label, fill=(0, 0, 0, 255))
170
-
171
- # Return the image with the OCR boxes drawn
172
- return image
 
1
+ # Necessary imports
 
2
  import re
3
+ import supervision as sv
4
+ from PIL import Image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
 
7
  # Text cleaning function
 
39
  return cleaned_text
40
 
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  # Draw OCR bounding boxes with enhanced visual elements
43
+ def draw_ocr_bboxes(image: Image, detections: sv.Detections) -> Image:
44
  """
45
+ Draws bounding boxes and labels on the input image based on the OCR detections.
46
 
47
  Args:
48
+ image (PIL.Image): The input image on which to draw the bounding boxes and labels.
49
+ detections (sv.Detections): The OCR detections containing the bounding box coordinates and labels.
50
 
51
  Returns:
52
+ PIL.Image: The annotated image with bounding boxes and labels.
53
+
54
  """
55
 
56
+ # Copy the input image to avoid modifying the original image
57
+ annotated_image = image.copy()
58
+
59
+ # Calculate the optimal line thickness and text scale based on the image resolution
60
+ thickness = sv.calculate_optimal_line_thickness(resolution_wh=image.size)
61
+ text_scale = sv.calculate_optimal_text_scale(resolution_wh=image.size)
62
+
63
+ # Initialize the bounding box and label annotators
64
+ bounding_box_annotator = sv.BoundingBoxAnnotator(
65
+ color_lookup=sv.ColorLookup.INDEX, thickness=thickness
66
+ )
67
+ label_annotator = sv.LabelAnnotator(
68
+ color_lookup=sv.ColorLookup.INDEX,
69
+ text_scale=text_scale,
70
+ text_thickness=thickness,
71
+ )
72
+
73
+ # Annotate the image with bounding boxes and labels
74
+ annotated_image = bounding_box_annotator.annotate(annotated_image, detections)
75
+ annotated_image = label_annotator.annotate(annotated_image, detections)
76
+
77
+ # Return the annotated image
78
+ return annotated_image