pantatwiai commited on
Commit
ce1d008
·
1 Parent(s): 2b5ccbe

Update order_text_blocks.py

Browse files
Files changed (1) hide show
  1. order_text_blocks.py +10 -6
order_text_blocks.py CHANGED
@@ -84,16 +84,20 @@ def get_hierarchy(labels):
84
  def textblock_ordering(textblocks, img):
85
  coords = []
86
  ori_coords = {}
 
87
  for obj in textblocks:
88
  x1, y1, x2, y2 = map(float, obj)
89
  ori_coords[tuple([x1, y1, x2, y2])] = [x1, y1, x2, y2]
90
  coords.append([x1, y1, x2, y2])
 
 
 
91
 
92
- # sort the textblocks by y1
93
- coords.sort(key = lambda x: x[1])
94
- # create vertical buckets of horizontal pixelsize of 15% of the image width
95
  buckets = []
96
- bucket_size = int(0.15 * img.shape[1])
97
  # put the textblocks in the buckets
98
  for coord in coords:
99
  if len(buckets) == 0:
@@ -106,9 +110,9 @@ def textblock_ordering(textblocks, img):
106
  else:
107
  buckets.append([coord])
108
 
109
- # sort each bucket by x1
110
  for bucket in buckets:
111
- bucket.sort(key = lambda x: x[0])
112
 
113
  # visualize the buckets one by one each with a different color
114
  # for bucket in buckets:
 
84
  def textblock_ordering(textblocks, img):
85
  coords = []
86
  ori_coords = {}
87
+ avg_width = 0
88
  for obj in textblocks:
89
  x1, y1, x2, y2 = map(float, obj)
90
  ori_coords[tuple([x1, y1, x2, y2])] = [x1, y1, x2, y2]
91
  coords.append([x1, y1, x2, y2])
92
+ avg_width += x2 - x1
93
+
94
+ avg_width /= len(coords)
95
 
96
+ # sort the textblocks by left x coordinate
97
+ coords.sort(key = lambda x: x[0])
98
+ # create vertical buckets based on avg. width of text blocks
99
  buckets = []
100
+ bucket_size = int(0.55 * avg_width)
101
  # put the textblocks in the buckets
102
  for coord in coords:
103
  if len(buckets) == 0:
 
110
  else:
111
  buckets.append([coord])
112
 
113
+ # sort each bucket by y1
114
  for bucket in buckets:
115
+ bucket.sort(key = lambda x: x[1])
116
 
117
  # visualize the buckets one by one each with a different color
118
  # for bucket in buckets: