alitavanaali commited on
Commit
07111e1
·
1 Parent(s): 2ffed31

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +114 -63
app.py CHANGED
@@ -78,6 +78,8 @@ def iob_to_label(label):
78
  if label == 7:
79
  return 'others'
80
 
 
 
81
  # this method will detect if there is any intersect between two boxes or not
82
  def intersect(w, z):
83
  x1 = max(w[0], z[0]) #190 | 881 | 10
@@ -90,18 +92,16 @@ def intersect(w, z):
90
  # because sometimes in annotating, it is possible to overlap rows or columns by mistake
91
  # for very small pixels, we check a threshold to delete them
92
  area = (x2-x1) * (y2-y1)
93
- if (area > 0): #500 is minumum accepted area
94
  return [int(x1), int(y1), int(x2), int(y2)]
95
  else:
96
  return 0
97
 
98
-
99
  def process_image(image):
100
  custom_config = r'--oem 3 --psm 6'
101
  # lang='eng+deu+ita+chi_sim'
102
- lang='spa'
103
  width, height = image.size
104
- feature_extractor = LayoutLMv3FeatureExtractor(apply_ocr=True,lang=lang)
105
  encoding_feature_extractor = feature_extractor(image, return_tensors="pt",truncation=True)
106
  words, boxes = encoding_feature_extractor.words, encoding_feature_extractor.boxes
107
 
@@ -131,13 +131,12 @@ def process_image(image):
131
  preds = []
132
  l_words = []
133
  bboxes = []
134
- token_section_num = []
135
 
136
  if (len(token_boxes) == 512):
137
  predictions = [predictions]
138
  token_boxes = [token_boxes]
139
 
140
-
141
  for i in range(0, len(token_boxes)):
142
  for j in range(0, len(token_boxes[i])):
143
  #print(np.asarray(token_boxes[i][j]).shape)
@@ -162,11 +161,8 @@ def process_image(image):
162
  # parts, so it's possible to have a word in both of the pages and we have to control that repetetive words
163
  # HERE: because they're in a same section, so we can merge them safely
164
  l_words[_index] = l_words[_index] + processor.tokenizer.decode(encoding["input_ids"][i][j])
165
-
166
  else:
167
  continue
168
-
169
-
170
  return bboxes, preds, l_words, image
171
 
172
 
@@ -175,24 +171,27 @@ def visualize_image(final_bbox, final_preds, l_words, image):
175
 
176
  draw = ImageDraw.Draw(image)
177
  font = ImageFont.load_default()
 
178
 
 
179
  label2color = {'song name':'red', 'artist':'blue', 'year':'black', 'album':'green', 'genres':'brown', 'song writer':'blue', 'lyrics':'purple', 'others': 'white'}
180
  l2l = {'song name':'red', 'artist':'blue', 'year':'black', 'album':'green', 'genres':'brown', 'song writer':'blue','lyrics':'purple', 'others':'white'}
181
  f_labels = {'song name':'red', 'artist':'blue', 'year':'black', 'album':'green', 'genres':'brown', 'song writer':'blue','lyrics':'purple', 'others':'white'}
182
 
183
-
184
  json_df = []
185
 
 
186
  for ix, (prediction, box) in enumerate(zip(final_preds, final_bbox)):
187
  predicted_label = iob_to_label(prediction).lower()
188
- draw.rectangle(box, outline=label2color[predicted_label])
189
- draw.text((box[0]+10, box[1]-10), text=predicted_label, fill=label2color[predicted_label], font=font)
 
190
 
191
- json_dict = {}
192
- json_dict['TEXT'] = l_words[ix]
193
- json_dict['LABEL'] = f_labels[predicted_label]
194
 
195
- json_df.append(json_dict)
196
  return image, json_df
197
 
198
 
@@ -206,55 +205,106 @@ def mergeCloseBoxes(pr, bb, wr, threshold):
206
  if (pred=='others'):
207
  continue
208
  else:
209
- final_bbox.append(box)
210
- final_preds.append(pred)
211
- final_words.append(word)
212
  for b, p, w in zip(bb, pr, wr):
213
- if (p == 'others'):
214
- continue
215
- elif (box==b): # we shouldn't check each item with itself
216
- continue
217
- else:
218
- XMIN, YMIN, XMAX, YMAX = box
219
- xmin, ymin, xmax, ymax = b
220
- intsc = intersect([XMIN, YMIN, XMAX+threshold, YMAX], [xmin-threshold, ymin, xmax, ymax])
221
- if (intsc != 0 and pred==p):
222
- #if(abs(XMAX - xmin) < treshold and abs(YMIN - ymin) < 10):
223
- if(box in final_bbox):
224
- final_bbox[idx]= [XMIN, min(YMIN, ymin), xmax, max(YMAX, ymax)]
225
- final_words[idx] = word + ' ' + w
226
- continue
227
-
228
- print('box: {}, label: {} is close to b:{} with this p:{}--> {}'.format(box, pred, b, p, word + ' ' + w))
229
-
230
- idx = idx +1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
  return final_bbox, final_preds, final_words
232
 
233
  def createDataframe(preds, words):
234
- df = pd.DataFrame(columns = ['container id' ,'seal number', 'container quantity', 'container type', 'package quantity', 'tare', 'weight'])
235
- flag_label = preds[0]
236
- #print(preds)
237
- #print(words)
238
- #print('@@@@@')
239
- #print(flag_label)
240
- row_number = -1
241
- for i in range(len(preds)):
242
- #print('i is: {}'.format(i))
243
- if (preds[i] == flag_label):
244
- row_number = row_number + 1
245
- df.at[row_number, preds[i]] = words[i]
246
- #print('row number is: {}'.format(row_number))
247
- continue
 
248
 
249
- else:
250
- #print('row_number {} is <= of df.shape {}'.format(row_number, df.shape[0]))
251
- #print(pd.isna(df[preds[i]].iloc[row_number]))
252
- #print(pd.isna(df[preds[i]].iloc[row_number]))
253
- if(pd.isna(df[preds[i]].iloc[row_number])):
254
- df.at[row_number, preds[i]] = words[i]
255
- else:
256
- row_number = row_number + 1
257
- df.at[row_number, preds[i]] = words[i]
258
 
259
  return df
260
 
@@ -288,7 +338,7 @@ def removeSimilarItems(final_bbox, final_preds, final_words):
288
 
289
  def process_form(preds, words, bboxes):
290
 
291
- final_bbox, final_preds, final_words = mergeCloseBoxes(preds, bboxes, words, 70)
292
  _bbox, _preds, _words = removeSimilarItems(final_bbox, final_preds, final_words)
293
  # convert float list to int
294
  _bbox = [[int(x) for x in item ] for item in _bbox]
@@ -319,8 +369,6 @@ def mergeImageVertical(a):
319
  imgs_comb.save( 'Trifecta_vertical.jpg' )
320
  return imgs_comb
321
 
322
-
323
-
324
  def completepreprocess(pdffile):
325
  myDataFrame = pd.DataFrame()
326
  a=[]
@@ -334,6 +382,8 @@ def completepreprocess(pdffile):
334
  images = Image.open("page"+str(i)+".jpg")
335
  image = images.convert("RGB")
336
  bbox, preds, words, image = process_image(image)
 
 
337
  im, df = visualize_image(bbox, preds, words, image)
338
  im1 = im.save("page"+str(i)+".jpg")
339
  a.append("page"+str(i)+".jpg")
@@ -358,6 +408,7 @@ css = """.output_image, .input_image {height: 600px !important}"""
358
  # ["744BJQ69.PDF"], ['tarros_2.jpg'],
359
  #examples = [['test1.jpg'], ['doc1.pdf'], ['doc1.2.pdf']]
360
 
 
361
  iface = gr.Interface(fn=completepreprocess,
362
  #inputs=gr.inputs.Image(type="pil",optional=True,label="upload file"),
363
  inputs=gr.File(label="PDF"),
 
78
  if label == 7:
79
  return 'others'
80
 
81
+
82
+
83
  # this method will detect if there is any intersect between two boxes or not
84
  def intersect(w, z):
85
  x1 = max(w[0], z[0]) #190 | 881 | 10
 
92
  # because sometimes in annotating, it is possible to overlap rows or columns by mistake
93
  # for very small pixels, we check a threshold to delete them
94
  area = (x2-x1) * (y2-y1)
95
+ if (area > 0):
96
  return [int(x1), int(y1), int(x2), int(y2)]
97
  else:
98
  return 0
99
 
 
100
  def process_image(image):
101
  custom_config = r'--oem 3 --psm 6'
102
  # lang='eng+deu+ita+chi_sim'
103
+ lang='eng'
104
  width, height = image.size
 
105
  encoding_feature_extractor = feature_extractor(image, return_tensors="pt",truncation=True)
106
  words, boxes = encoding_feature_extractor.words, encoding_feature_extractor.boxes
107
 
 
131
  preds = []
132
  l_words = []
133
  bboxes = []
134
+ token_section_num = [] # related to more than 512 tokens
135
 
136
  if (len(token_boxes) == 512):
137
  predictions = [predictions]
138
  token_boxes = [token_boxes]
139
 
 
140
  for i in range(0, len(token_boxes)):
141
  for j in range(0, len(token_boxes[i])):
142
  #print(np.asarray(token_boxes[i][j]).shape)
 
161
  # parts, so it's possible to have a word in both of the pages and we have to control that repetetive words
162
  # HERE: because they're in a same section, so we can merge them safely
163
  l_words[_index] = l_words[_index] + processor.tokenizer.decode(encoding["input_ids"][i][j])
 
164
  else:
165
  continue
 
 
166
  return bboxes, preds, l_words, image
167
 
168
 
 
171
 
172
  draw = ImageDraw.Draw(image)
173
  font = ImageFont.load_default()
174
+ #{0: 'document number', 1: 'elemento pn', 2: 'nombre del responsabile', 3: 'fecha', 4: 'internal reference', 5: 'others'}
175
 
176
+ #id2label = {0: 'song name', 1: 'artist', 2: 'year', 3: 'album', 4: 'genres', 5: 'song writer', 6: 'lyrics', 7: 'others'}
177
  label2color = {'song name':'red', 'artist':'blue', 'year':'black', 'album':'green', 'genres':'brown', 'song writer':'blue', 'lyrics':'purple', 'others': 'white'}
178
  l2l = {'song name':'red', 'artist':'blue', 'year':'black', 'album':'green', 'genres':'brown', 'song writer':'blue','lyrics':'purple', 'others':'white'}
179
  f_labels = {'song name':'red', 'artist':'blue', 'year':'black', 'album':'green', 'genres':'brown', 'song writer':'blue','lyrics':'purple', 'others':'white'}
180
 
 
181
  json_df = []
182
 
183
+ # draw bboxes on image
184
  for ix, (prediction, box) in enumerate(zip(final_preds, final_bbox)):
185
  predicted_label = iob_to_label(prediction).lower()
186
+ if (predicted_label != 'others'):
187
+ draw.rectangle(box, outline=label2color[predicted_label])
188
+ draw.text((box[0]+10, box[1]-10), text=predicted_label, fill=label2color[predicted_label], font=font)
189
 
190
+ json_dict = {}
191
+ json_dict['TEXT'] = l_words[ix]
192
+ json_dict['LABEL'] = f_labels[predicted_label]
193
 
194
+ json_df.append(json_dict)
195
  return image, json_df
196
 
197
 
 
205
  if (pred=='others'):
206
  continue
207
  else:
208
+ flag = False
 
 
209
  for b, p, w in zip(bb, pr, wr):
210
+ if (p == 'others'):
211
+ #print('others')
212
+ #print('-------')
213
+ continue
214
+ elif (box==b): # we shouldn't check each item with itself
215
+ #print('itself')
216
+ #print('--------')
217
+ continue
218
+ else:
219
+ XMIN, YMIN, XMAX, YMAX = box
220
+ xmin, ymin, xmax, ymax = b
221
+ #print('word: {} , w:{}'.format(word, w))
222
+ intsc = intersect([XMIN, YMIN, XMAX+threshold, YMAX], [xmin-threshold, ymin, xmax, ymax])
223
+ if (intsc != 0 and pred==p):
224
+ flag = True
225
+ #print('there is intersect')
226
+ # if(abs(XMAX - xmin) < treshold and abs(YMIN - ymin) < 10):
227
+ # we have to check if there is any intersection between box and all the values in final_bbox list
228
+ # because if we have updated it before, now we have to update in final_bbox
229
+ #print(final_bbox)
230
+ print(*final_bbox, sep=",")
231
+ merged_box = [
232
+ min(XMIN, xmin),
233
+ min(YMIN, ymin),
234
+ max(XMAX, xmax),
235
+ max(YMAX, ymax)
236
+ ]
237
+ merged_words = word + ' ' + w
238
+ # add to final_bbox
239
+ wasAvailable = False
240
+ for id, fbox in enumerate(final_bbox):
241
+ if (intersect(box, fbox) != 0 and pred==final_preds[id]):
242
+ #print('added before!')
243
+ # box is inside another processed box, so we have to update it
244
+ wasAvailable = True
245
+ merged_box = [
246
+ min(fbox[0], min(XMIN, xmin)),
247
+ min(fbox[1], min(YMIN, ymin)),
248
+ max(fbox[2], max(XMAX, xmax)),
249
+ max(fbox[3], max(YMAX, ymax))
250
+ ]
251
+ final_bbox[id] = merged_box
252
+ final_words[id] = final_words[id] + ' ' + w
253
+ break
254
+
255
+ if (not wasAvailable):
256
+ # there was no intersect, bbox is not added before
257
+ #print('not added before, so we add merged box!')
258
+ final_bbox.append(merged_box)
259
+ final_preds.append(pred)
260
+ final_words.append(merged_words)
261
+ '''else:
262
+ print()
263
+ final_bbox.append(box)
264
+ final_preds.append(pred)
265
+ final_words.append(word)'''
266
+ if (flag == False):
267
+ #print('flag is false, word: {} added'.format(word))
268
+ # there is no intersect between word and the others
269
+ # we will check for last time if box is inside the others, because if the word is last word (like Juan + Mulian + Alexander) (Alexander)
270
+ # it is added before but it has not intersection with others, so we will check to prevent
271
+ for id, fbox in enumerate(final_bbox):
272
+ if (intersect(box, fbox) != 0 and pred==final_preds[id]):
273
+ flag = True
274
+
275
+ if (not flag):
276
+ final_bbox.append(box)
277
+ final_preds.append(pred)
278
+ final_words.append(word)
279
+
280
  return final_bbox, final_preds, final_words
281
 
282
  def createDataframe(preds, words):
283
+ df = pd.DataFrame(columns = ['song name', 'artist', 'year', 'album', 'genres', 'song writer', 'lyrics', 'others'])
284
+ if (len(preds) > 0):
285
+ flag_label = preds[0]
286
+ #print(preds)
287
+ #print(words)
288
+ #print('@@@@@')
289
+ #print(flag_label)
290
+ row_number = -1
291
+ for i in range(len(preds)):
292
+ #print('i is: {}'.format(i))
293
+ if (preds[i] == flag_label):
294
+ row_number = row_number + 1
295
+ df.at[row_number, preds[i]] = words[i]
296
+ #print('row number is: {}'.format(row_number))
297
+ continue
298
 
299
+ else:
300
+ #print('row_number {} is <= of df.shape {}'.format(row_number, df.shape[0]))
301
+ #print(pd.isna(df[preds[i]].iloc[row_number]))
302
+ #print(pd.isna(df[preds[i]].iloc[row_number]))
303
+ if(pd.isna(df[preds[i]].iloc[row_number])):
304
+ df.at[row_number, preds[i]] = words[i]
305
+ else:
306
+ row_number = row_number + 1
307
+ df.at[row_number, preds[i]] = words[i]
308
 
309
  return df
310
 
 
338
 
339
  def process_form(preds, words, bboxes):
340
 
341
+ final_bbox, final_preds, final_words = mergeCloseBoxes(preds, bboxes, words, 30)
342
  _bbox, _preds, _words = removeSimilarItems(final_bbox, final_preds, final_words)
343
  # convert float list to int
344
  _bbox = [[int(x) for x in item ] for item in _bbox]
 
369
  imgs_comb.save( 'Trifecta_vertical.jpg' )
370
  return imgs_comb
371
 
 
 
372
  def completepreprocess(pdffile):
373
  myDataFrame = pd.DataFrame()
374
  a=[]
 
382
  images = Image.open("page"+str(i)+".jpg")
383
  image = images.convert("RGB")
384
  bbox, preds, words, image = process_image(image)
385
+ print(preds)
386
+ print(words)
387
  im, df = visualize_image(bbox, preds, words, image)
388
  im1 = im.save("page"+str(i)+".jpg")
389
  a.append("page"+str(i)+".jpg")
 
408
  # ["744BJQ69.PDF"], ['tarros_2.jpg'],
409
  #examples = [['test1.jpg'], ['doc1.pdf'], ['doc1.2.pdf']]
410
 
411
+
412
  iface = gr.Interface(fn=completepreprocess,
413
  #inputs=gr.inputs.Image(type="pil",optional=True,label="upload file"),
414
  inputs=gr.File(label="PDF"),