Spaces:
Sleeping
Sleeping
Commit
·
ce1d008
1
Parent(s):
2b5ccbe
Update order_text_blocks.py
Browse files- order_text_blocks.py +10 -6
order_text_blocks.py
CHANGED
@@ -84,16 +84,20 @@ def get_hierarchy(labels):
|
|
84 |
def textblock_ordering(textblocks, img):
|
85 |
coords = []
|
86 |
ori_coords = {}
|
|
|
87 |
for obj in textblocks:
|
88 |
x1, y1, x2, y2 = map(float, obj)
|
89 |
ori_coords[tuple([x1, y1, x2, y2])] = [x1, y1, x2, y2]
|
90 |
coords.append([x1, y1, x2, y2])
|
|
|
|
|
|
|
91 |
|
92 |
-
# sort the textblocks by
|
93 |
-
coords.sort(key = lambda x: x[
|
94 |
-
# create vertical buckets
|
95 |
buckets = []
|
96 |
-
bucket_size = int(0.
|
97 |
# put the textblocks in the buckets
|
98 |
for coord in coords:
|
99 |
if len(buckets) == 0:
|
@@ -106,9 +110,9 @@ def textblock_ordering(textblocks, img):
|
|
106 |
else:
|
107 |
buckets.append([coord])
|
108 |
|
109 |
-
# sort each bucket by
|
110 |
for bucket in buckets:
|
111 |
-
bucket.sort(key = lambda x: x[
|
112 |
|
113 |
# visualize the buckets one by one each with a different color
|
114 |
# for bucket in buckets:
|
|
|
84 |
def textblock_ordering(textblocks, img):
|
85 |
coords = []
|
86 |
ori_coords = {}
|
87 |
+
avg_width = 0
|
88 |
for obj in textblocks:
|
89 |
x1, y1, x2, y2 = map(float, obj)
|
90 |
ori_coords[tuple([x1, y1, x2, y2])] = [x1, y1, x2, y2]
|
91 |
coords.append([x1, y1, x2, y2])
|
92 |
+
avg_width += x2 - x1
|
93 |
+
|
94 |
+
avg_width /= len(coords)
|
95 |
|
96 |
+
# sort the textblocks by left x coordinate
|
97 |
+
coords.sort(key = lambda x: x[0])
|
98 |
+
# create vertical buckets based on avg. width of text blocks
|
99 |
buckets = []
|
100 |
+
bucket_size = int(0.55 * avg_width)
|
101 |
# put the textblocks in the buckets
|
102 |
for coord in coords:
|
103 |
if len(buckets) == 0:
|
|
|
110 |
else:
|
111 |
buckets.append([coord])
|
112 |
|
113 |
+
# sort each bucket by y1
|
114 |
for bucket in buckets:
|
115 |
+
bucket.sort(key = lambda x: x[1])
|
116 |
|
117 |
# visualize the buckets one by one each with a different color
|
118 |
# for bucket in buckets:
|