BMukhtar commited on
Commit
29ae74e
·
1 Parent(s): 2d38cb5
Files changed (1) hide show
  1. app.py +93 -83
app.py CHANGED
@@ -84,7 +84,7 @@ for d in dirs:
84
  font_path = models_dir + "/Ubuntu-Regular.ttf"
85
  reader = easyocr.Reader(
86
  ['en'],
87
- gpu=False,
88
  recog_network='best_norm_ED',
89
  detect_network="craft",
90
  user_network_directory=models_dir,
@@ -106,14 +106,7 @@ uploaded_file = st.file_uploader("Өз файлыңызды осында жүк
106
 
107
  col1, col2 = st.columns(2)
108
 
109
- #def process_page(page):
110
- # image_matrix = fitz.Matrix(fitz.Identity)
111
- # pixmap = page.get_pixmap(matrix=image_matrix, dpi=300)
112
- # image_data = pixmap.samples# This is a bytes object
113
- # image = Image.from("RGB",(pixmap.width, pixmap.height),image_data)
114
- # image = Image.from("RGB", (pixmap.width, pixmap.height), image_data)
115
- # result = reader.readtext(np.array(image),paragraph=True)
116
- # return image, result
117
  import time
118
 
119
  max_page = 5
@@ -121,7 +114,7 @@ def recognize_page_image(image):
121
  start = time.time()
122
  result = [[0,"Sample 1"],[1,"Sample 2"]]
123
  result = reader.readtext(np.array(image), batch_size=64, paragraph=False, y_ths=0, width_ths = 0)
124
- result = get_paragraph(result, y_ths=0, x_ths = 0)
125
  end = time.time()
126
  return result,(end-start)
127
 
@@ -163,79 +156,96 @@ def process_pdf(uploaded_file):
163
  #col1.write("</div>",unsafe_allow_html=True)
164
  progress_bar.progress(0.99,text=f'{min(total_pages,max_page)} бет жүктелді')
165
 
166
- def get_paragraph(raw_result, x_ths=1, y_ths=0.5, mode = 'ltr'):
167
- # create basic attributes
168
- box_group = []
169
- for box in raw_result:
170
- all_x = [int(coord[0]) for coord in box[0]]
171
- all_y = [int(coord[1]) for coord in box[0]]
172
- min_x = min(all_x)
173
- max_x = max(all_x)
174
- min_y = min(all_y)
175
- max_y = max(all_y)
176
- height = max_y - min_y
177
- box_group.append([box[1], min_x, max_x, min_y, max_y, height, 0.5*(min_y+max_y), 0]) # last element indicates group
178
- # cluster boxes into paragraph
179
- current_group = 1
180
- while len([box for box in box_group if box[7]==0]) > 0:
181
- box_group0 = [box for box in box_group if box[7]==0] # group0 = non-group
182
- # new group
183
- if len([box for box in box_group if box[7]==current_group]) == 0:
184
- box_group0[0][7] = current_group # assign first box to form new group
185
- # try to add group
 
 
 
 
 
 
 
 
 
 
 
186
  else:
187
- current_box_group = [box for box in box_group if box[7]==current_group]
188
- mean_height = np.mean([box[5] for box in current_box_group])
189
- # min_gx = min([box[1] for box in current_box_group]) - x_ths*mean_height
190
- # max_gx = max([box[2] for box in current_box_group]) + x_ths*mean_height
191
- # min_gy = min([box[3] for box in current_box_group]) - y_ths*mean_height
192
- # max_gy = max([box[4] for box in current_box_group]) + y_ths*mean_height
193
- add_box = False
194
-
195
- for box in current_box_group:
196
- min_gx = box[1] - x_ths*mean_height
197
- max_gx = box[2] + x_ths*mean_height
198
- min_gy = box[3] - y_ths*mean_height
199
- max_gy = box[4] + y_ths*mean_height
200
- for box in box_group0:
201
- same_horizontal_level = (min_gx<=box[1]<=max_gx) or (min_gx<=box[2]<=max_gx)
202
- same_vertical_level = (min_gy<=box[6]<=max_gy)
203
- if same_horizontal_level and same_vertical_level:
204
- box[7] = current_group
205
- add_box = True
206
  break
207
- # cannot add more box, go to next group
208
- if add_box==False:
209
- current_group += 1
210
- # arrage order in paragraph
211
- result = []
212
- for i in set(box[7] for box in box_group):
213
- current_box_group = [box for box in box_group if box[7]==i]
214
- mean_height = np.mean([box[5] for box in current_box_group])
215
- min_gx = min([box[1] for box in current_box_group])
216
- max_gx = max([box[2] for box in current_box_group])
217
- min_gy = min([box[3] for box in current_box_group])
218
- max_gy = max([box[4] for box in current_box_group])
219
-
220
- text = ''
221
- while len(current_box_group) > 0:
222
- highest = min([box[6] for box in current_box_group])
223
- candidates = [box for box in current_box_group if box[6]<highest+0.4*mean_height]
224
- # get the far left
225
- if mode == 'ltr':
226
- most_left = min([box[1] for box in candidates])
227
- for box in candidates:
228
- if box[1] == most_left: best_box = box
229
- elif mode == 'rtl':
230
- most_right = max([box[2] for box in candidates])
231
- for box in candidates:
232
- if box[2] == most_right: best_box = box
233
- text += ' '+best_box[0]
234
- current_box_group.remove(best_box)
235
-
236
- result.append([ [[min_gx,min_gy],[max_gx,min_gy],[max_gx,max_gy],[min_gx,max_gy]], text[1:]])
237
-
238
- return result
 
 
 
 
 
 
 
239
 
240
 
241
  if uploaded_file is not None:
@@ -252,8 +262,8 @@ if uploaded_file is not None:
252
  image = Image.open(uploaded_file)
253
  #with open(os.path.join("tempDir",image_file))
254
  col1.image(image)
255
- result = reader.readtext(np.array(image), batch_size=64, paragraph=False, y_ths=0, width_ths = 0)
256
- result = get_paragraph(result, y_ths=0)
257
  result_text = "\n\n".join([item[1] for item in result])
258
  button_group_html = generateButtonGroup(result)
259
  col2.write(button_group_html, unsafe_allow_html=True)
 
84
  font_path = models_dir + "/Ubuntu-Regular.ttf"
85
  reader = easyocr.Reader(
86
  ['en'],
87
+ gpu=True,
88
  recog_network='best_norm_ED',
89
  detect_network="craft",
90
  user_network_directory=models_dir,
 
106
 
107
  col1, col2 = st.columns(2)
108
 
109
+
 
 
 
 
 
 
 
110
  import time
111
 
112
  max_page = 5
 
114
  start = time.time()
115
  result = [[0,"Sample 1"],[1,"Sample 2"]]
116
  result = reader.readtext(np.array(image), batch_size=64, paragraph=False, y_ths=0, width_ths = 0)
117
+ result = get_paragraph(result)
118
  end = time.time()
119
  return result,(end-start)
120
 
 
156
  #col1.write("</div>",unsafe_allow_html=True)
157
  progress_bar.progress(0.99,text=f'{min(total_pages,max_page)} бет жүктелді')
158
 
159
+ class TextBox:
160
+ def __init__(self, text, coordinates):
161
+ # order: topLeft, bottomLeft, bottomRight, topRight
162
+ x_coords = [int(coord[0]) for coord in coordinates]
163
+ y_coords = [int(coord[1]) for coord in coordinates]
164
+
165
+ self.text = text
166
+ self.min_x = min(x_coords)
167
+ self.max_x = max(x_coords)
168
+ self.min_y = min(y_coords)
169
+ self.max_y = max(y_coords)
170
+ self.height = self.max_y - self.min_y
171
+ self.center_y = 0.5 * (self.min_y + self.max_y)
172
+ self.group_id = 0 # Initially ungrouped
173
+
174
+ def __repr__(self):
175
+ return f"TextBox(text={self.text}, group_id={self.group_id})"
176
+
177
+
178
+ def get_paragraph(ocr_results, horizontal_threshold=0.5, vertical_threshold=0.0, reading_mode='ltr'):
179
+ # Convert raw OCR results into TextBox objects
180
+ text_boxes = [TextBox(box[1], box[0]) for box in ocr_results]
181
+
182
+ # Group the boxes into paragraphs
183
+ current_group_id = 1
184
+ while any(box.group_id == 0 for box in text_boxes): # While there are ungrouped boxes
185
+ ungrouped_boxes = [box for box in text_boxes if box.group_id == 0]
186
+
187
+ # Start a new group if none exists for the current group_id
188
+ if all(box.group_id != current_group_id for box in text_boxes):
189
+ ungrouped_boxes[0].group_id = current_group_id # Assign the first ungrouped box to the new group
190
  else:
191
+ # Try to add boxes to the current group
192
+ current_group_boxes = [box for box in text_boxes if box.group_id == current_group_id]
193
+ average_height = np.mean([box.height for box in current_group_boxes])
194
+ added_to_group = False
195
+
196
+ for group_box in current_group_boxes:
197
+ min_group_x = group_box.min_x - horizontal_threshold * average_height
198
+ max_group_x = group_box.max_x + horizontal_threshold * average_height
199
+ min_group_y = group_box.min_y - vertical_threshold * average_height
200
+ max_group_y = group_box.max_y + vertical_threshold * average_height
201
+
202
+ for ungrouped_box in ungrouped_boxes:
203
+ horizontally_aligned = (min_group_x <= ungrouped_box.min_x <= max_group_x) or (min_group_x <= ungrouped_box.max_x <= max_group_x)
204
+ vertically_aligned = (min_group_y <= ungrouped_box.center_y <= max_group_y)
205
+
206
+ if horizontally_aligned and vertically_aligned:
207
+ ungrouped_box.group_id = current_group_id
208
+ added_to_group = True
 
209
  break
210
+
211
+ # If no box was added to the current group, move to the next group
212
+ if not added_to_group:
213
+ current_group_id += 1
214
+
215
+ # Arrange the text order within each group to form paragraphs
216
+ paragraphs = []
217
+ for group_id in set(box.group_id for box in text_boxes):
218
+ boxes_in_group = [box for box in text_boxes if box.group_id == group_id]
219
+ average_height = np.mean([box.height for box in boxes_in_group])
220
+ min_group_x = min([box.min_x for box in boxes_in_group])
221
+ max_group_x = max([box.max_x for box in boxes_in_group])
222
+ min_group_y = min([box.min_y for box in boxes_in_group])
223
+ max_group_y = max([box.max_y for box in boxes_in_group])
224
+
225
+ paragraph_text = ''
226
+ while boxes_in_group:
227
+ highest_y = min([box.center_y for box in boxes_in_group])
228
+ line_candidates = [box for box in boxes_in_group if box.center_y < highest_y + 0.4 * average_height]
229
+
230
+ # Determine the left-most or right-most box based on reading mode
231
+ if reading_mode == 'ltr':
232
+ left_most_x = min([box.min_x for box in line_candidates])
233
+ for box in line_candidates:
234
+ if box.min_x == left_most_x:
235
+ selected_box = box
236
+ elif reading_mode == 'rtl':
237
+ right_most_x = max([box.max_x for box in line_candidates])
238
+ for box in line_candidates:
239
+ if box.max_x == right_most_x:
240
+ selected_box = box
241
+
242
+ paragraph_text += ' ' + selected_box.text
243
+ boxes_in_group.remove(selected_box)
244
+
245
+ # Append the bounding box and text for the paragraph
246
+ paragraphs.append([[[min_group_x, min_group_y], [max_group_x, min_group_y], [max_group_x, max_group_y], [min_group_x, max_group_y]], paragraph_text.strip()])
247
+
248
+ return paragraphs
249
 
250
 
251
  if uploaded_file is not None:
 
262
  image = Image.open(uploaded_file)
263
  #with open(os.path.join("tempDir",image_file))
264
  col1.image(image)
265
+ result = reader.readtext(np.array(image), batch_size=64, paragraph=False, y_ths=0, width_ths = 0, text_threshold=0.3)
266
+ result = get_paragraph(result)
267
  result_text = "\n\n".join([item[1] for item in result])
268
  button_group_html = generateButtonGroup(result)
269
  col2.write(button_group_html, unsafe_allow_html=True)