Spaces:

alitavanaali
/

music

Runtime error

App Files Files Community

alitavanaali commited on Feb 6, 2023

Commit

07111e1

1 Parent(s): 2ffed31

Update app.py

Browse files

Files changed (1) hide show

app.py +114 -63

app.py CHANGED Viewed

@@ -78,6 +78,8 @@ def iob_to_label(label):
   if label == 7:
     return 'others'
 # this method will detect if there is any intersect between two boxes or not
 def intersect(w, z):
     x1 = max(w[0], z[0]) #190  | 881  |  10
@@ -90,18 +92,16 @@ def intersect(w, z):
       # because sometimes in annotating, it is possible to overlap rows or columns by mistake
       # for very small pixels, we check a threshold to delete them
       area = (x2-x1) * (y2-y1)
-      if (area > 0):  #500 is minumum accepted area
         return [int(x1), int(y1), int(x2), int(y2)]
       else:
         return 0
 def process_image(image):
     custom_config = r'--oem 3 --psm 6'
     # lang='eng+deu+ita+chi_sim'
-    lang='spa'
     width, height = image.size
-    feature_extractor = LayoutLMv3FeatureExtractor(apply_ocr=True,lang=lang)
     encoding_feature_extractor = feature_extractor(image, return_tensors="pt",truncation=True)
     words, boxes = encoding_feature_extractor.words, encoding_feature_extractor.boxes
@@ -131,13 +131,12 @@ def process_image(image):
     preds = []
     l_words = []
     bboxes = []
-    token_section_num = []
     if (len(token_boxes) == 512):
       predictions = [predictions]
       token_boxes = [token_boxes]
     for i in range(0, len(token_boxes)):
       for j in range(0, len(token_boxes[i])):
         #print(np.asarray(token_boxes[i][j]).shape)
@@ -162,11 +161,8 @@ def process_image(image):
             # parts, so it's possible to have a word in both of the pages and we have to control that repetetive words
             # HERE: because they're in a same section, so we can merge them safely
             l_words[_index] = l_words[_index] + processor.tokenizer.decode(encoding["input_ids"][i][j])
           else:
             continue
     return bboxes, preds, l_words, image
@@ -175,24 +171,27 @@ def visualize_image(final_bbox, final_preds, l_words, image):
       draw = ImageDraw.Draw(image)
       font = ImageFont.load_default()
       label2color = {'song name':'red', 'artist':'blue', 'year':'black', 'album':'green', 'genres':'brown', 'song writer':'blue', 'lyrics':'purple', 'others': 'white'}
       l2l = {'song name':'red', 'artist':'blue', 'year':'black', 'album':'green', 'genres':'brown', 'song writer':'blue','lyrics':'purple', 'others':'white'}
       f_labels = {'song name':'red', 'artist':'blue', 'year':'black', 'album':'green', 'genres':'brown', 'song writer':'blue','lyrics':'purple', 'others':'white'}
       json_df = []
       for ix, (prediction, box) in enumerate(zip(final_preds, final_bbox)):
         predicted_label = iob_to_label(prediction).lower()
-        draw.rectangle(box, outline=label2color[predicted_label])
-        draw.text((box[0]+10, box[1]-10), text=predicted_label, fill=label2color[predicted_label], font=font)
-        json_dict = {}
-        json_dict['TEXT'] = l_words[ix]
-        json_dict['LABEL'] = f_labels[predicted_label]
-        json_df.append(json_dict)
       return image, json_df
@@ -206,55 +205,106 @@ def mergeCloseBoxes(pr, bb, wr, threshold):
     if (pred=='others'):
       continue
     else:
-      final_bbox.append(box)
-      final_preds.append(pred)
-      final_words.append(word)
       for b, p, w in zip(bb, pr, wr):
-        if (p == 'others'):
-          continue
-        elif (box==b): # we shouldn't check each item with itself
-          continue
-        else:
-          XMIN, YMIN, XMAX, YMAX = box
-          xmin, ymin, xmax, ymax = b
-          intsc = intersect([XMIN, YMIN, XMAX+threshold, YMAX], [xmin-threshold, ymin, xmax, ymax])
-          if (intsc != 0 and pred==p):
-          #if(abs(XMAX - xmin) < treshold and abs(YMIN - ymin) < 10):
-            if(box in final_bbox):
-              final_bbox[idx]= [XMIN, min(YMIN, ymin), xmax, max(YMAX, ymax)]
-              final_words[idx] = word + ' ' + w
-              continue
-            print('box: {}, label: {} is close to b:{} with this p:{}--> {}'.format(box, pred, b, p, word + ' ' + w))
-    idx = idx +1
   return final_bbox, final_preds, final_words
 def createDataframe(preds, words):
-  df = pd.DataFrame(columns = ['container id' ,'seal number', 'container quantity', 'container type', 'package quantity', 'tare', 'weight'])
-  flag_label = preds[0]
-  #print(preds)
-  #print(words)
-  #print('@@@@@')
-  #print(flag_label)
-  row_number = -1
-  for i in range(len(preds)):
-      #print('i is: {}'.format(i))
-      if (preds[i] == flag_label):
-          row_number = row_number + 1
-          df.at[row_number, preds[i]] = words[i]
-          #print('row number is: {}'.format(row_number))
-          continue
-      else:
-        #print('row_number {} is <= of df.shape {}'.format(row_number, df.shape[0]))
-        #print(pd.isna(df[preds[i]].iloc[row_number]))
-        #print(pd.isna(df[preds[i]].iloc[row_number]))
-        if(pd.isna(df[preds[i]].iloc[row_number])):
-          df.at[row_number, preds[i]] = words[i]
-        else:
-          row_number = row_number + 1
-          df.at[row_number, preds[i]] = words[i]
   return df
@@ -288,7 +338,7 @@ def removeSimilarItems(final_bbox, final_preds, final_words):
 def process_form(preds, words, bboxes):
-  final_bbox, final_preds, final_words = mergeCloseBoxes(preds, bboxes, words, 70)
   _bbox, _preds, _words = removeSimilarItems(final_bbox, final_preds, final_words)
   # convert float list to int
   _bbox = [[int(x) for x in item ] for item in _bbox]
@@ -319,8 +369,6 @@ def mergeImageVertical(a):
   imgs_comb.save( 'Trifecta_vertical.jpg' )
   return imgs_comb
 def completepreprocess(pdffile):
   myDataFrame = pd.DataFrame()
   a=[]
@@ -334,6 +382,8 @@ def completepreprocess(pdffile):
     images = Image.open("page"+str(i)+".jpg")
     image = images.convert("RGB")
     bbox, preds, words, image = process_image(image)
     im, df = visualize_image(bbox, preds, words, image)
     im1 = im.save("page"+str(i)+".jpg")
     a.append("page"+str(i)+".jpg")
@@ -358,6 +408,7 @@ css = """.output_image, .input_image {height: 600px !important}"""
 # ["744BJQ69.PDF"], ['tarros_2.jpg'],
 #examples = [['test1.jpg'], ['doc1.pdf'], ['doc1.2.pdf']]
 iface = gr.Interface(fn=completepreprocess,
                      #inputs=gr.inputs.Image(type="pil",optional=True,label="upload file"),
                      inputs=gr.File(label="PDF"),

   if label == 7:
     return 'others'
 # this method will detect if there is any intersect between two boxes or not
 def intersect(w, z):
     x1 = max(w[0], z[0]) #190  | 881  |  10
       # because sometimes in annotating, it is possible to overlap rows or columns by mistake
       # for very small pixels, we check a threshold to delete them
       area = (x2-x1) * (y2-y1)
+      if (area > 0):
         return [int(x1), int(y1), int(x2), int(y2)]
       else:
         return 0
 def process_image(image):
     custom_config = r'--oem 3 --psm 6'
     # lang='eng+deu+ita+chi_sim'
+    lang='eng'
     width, height = image.size
     encoding_feature_extractor = feature_extractor(image, return_tensors="pt",truncation=True)
     words, boxes = encoding_feature_extractor.words, encoding_feature_extractor.boxes
     preds = []
     l_words = []
     bboxes = []
+    token_section_num = [] # related to more than 512 tokens
     if (len(token_boxes) == 512):
       predictions = [predictions]
       token_boxes = [token_boxes]
     for i in range(0, len(token_boxes)):
       for j in range(0, len(token_boxes[i])):
         #print(np.asarray(token_boxes[i][j]).shape)
             # parts, so it's possible to have a word in both of the pages and we have to control that repetetive words
             # HERE: because they're in a same section, so we can merge them safely
             l_words[_index] = l_words[_index] + processor.tokenizer.decode(encoding["input_ids"][i][j])
           else:
             continue
     return bboxes, preds, l_words, image
       draw = ImageDraw.Draw(image)
       font = ImageFont.load_default()
+      #{0: 'document number', 1: 'elemento pn', 2: 'nombre del responsabile', 3: 'fecha', 4: 'internal reference', 5: 'others'}
+      #id2label = {0: 'song name', 1: 'artist', 2: 'year', 3: 'album', 4: 'genres', 5: 'song writer', 6: 'lyrics', 7: 'others'}
       label2color = {'song name':'red', 'artist':'blue', 'year':'black', 'album':'green', 'genres':'brown', 'song writer':'blue', 'lyrics':'purple', 'others': 'white'}
       l2l = {'song name':'red', 'artist':'blue', 'year':'black', 'album':'green', 'genres':'brown', 'song writer':'blue','lyrics':'purple', 'others':'white'}
       f_labels = {'song name':'red', 'artist':'blue', 'year':'black', 'album':'green', 'genres':'brown', 'song writer':'blue','lyrics':'purple', 'others':'white'}
       json_df = []
+      # draw bboxes on image
       for ix, (prediction, box) in enumerate(zip(final_preds, final_bbox)):
         predicted_label = iob_to_label(prediction).lower()
+        if (predicted_label != 'others'):
+          draw.rectangle(box, outline=label2color[predicted_label])
+          draw.text((box[0]+10, box[1]-10), text=predicted_label, fill=label2color[predicted_label], font=font)
+          json_dict = {}
+          json_dict['TEXT'] = l_words[ix]
+          json_dict['LABEL'] = f_labels[predicted_label]
+          json_df.append(json_dict)
       return image, json_df
     if (pred=='others'):
       continue
     else:
+      flag = False
       for b, p, w in zip(bb, pr, wr):
+              if (p == 'others'):
+                #print('others')
+                #print('-------')
+                continue
+              elif (box==b): # we shouldn't check each item with itself
+                #print('itself')
+                #print('--------')
+                continue
+              else:
+                XMIN, YMIN, XMAX, YMAX = box
+                xmin, ymin, xmax, ymax = b
+                #print('word: {} , w:{}'.format(word, w))
+                intsc = intersect([XMIN, YMIN, XMAX+threshold, YMAX], [xmin-threshold, ymin, xmax, ymax])
+                if (intsc != 0 and pred==p):
+                        flag = True
+                        #print('there is intersect')
+                        # if(abs(XMAX - xmin) < treshold and abs(YMIN - ymin) < 10):
+                        # we have to check if there is any intersection between box and all the values in final_bbox list
+                        # because if we have updated it before, now we have to update in final_bbox
+                        #print(final_bbox)
+                        print(*final_bbox, sep=",")
+                        merged_box = [
+                            min(XMIN, xmin),
+                            min(YMIN, ymin),
+                            max(XMAX, xmax),
+                            max(YMAX, ymax)
+                        ]
+                        merged_words = word + ' ' + w
+                        # add to final_bbox
+                        wasAvailable = False
+                        for id, fbox in enumerate(final_bbox):
+                            if (intersect(box, fbox) != 0 and pred==final_preds[id]):
+                                #print('added before!')
+                                # box is inside another processed box, so we have to update it
+                                wasAvailable = True
+                                merged_box = [
+                                    min(fbox[0], min(XMIN, xmin)),
+                                    min(fbox[1], min(YMIN, ymin)),
+                                    max(fbox[2], max(XMAX, xmax)),
+                                    max(fbox[3], max(YMAX, ymax))
+                                ]
+                                final_bbox[id] = merged_box
+                                final_words[id] = final_words[id] + ' ' + w
+                                break
+                        if (not wasAvailable):
+                            # there was no intersect, bbox is not added before
+                            #print('not added before, so we add merged box!')
+                            final_bbox.append(merged_box)
+                            final_preds.append(pred)
+                            final_words.append(merged_words)
+                '''else:
+                        print()
+                        final_bbox.append(box)
+                        final_preds.append(pred)
+                        final_words.append(word)'''
+      if (flag == False):
+          #print('flag is false, word: {} added'.format(word))
+          # there is no intersect between word and the others
+          # we will check for last time if box is inside the others, because if the word is last word (like Juan + Mulian + Alexander) (Alexander)
+          # it is added before but it has not intersection with others, so we will check to prevent
+          for id, fbox in enumerate(final_bbox):
+            if (intersect(box, fbox) != 0 and pred==final_preds[id]):
+              flag = True
+          if (not flag):
+            final_bbox.append(box)
+            final_preds.append(pred)
+            final_words.append(word)
   return final_bbox, final_preds, final_words
 def createDataframe(preds, words):
+  df = pd.DataFrame(columns = ['song name', 'artist', 'year', 'album', 'genres', 'song writer', 'lyrics', 'others'])
+  if (len(preds) > 0):
+      flag_label = preds[0]
+      #print(preds)
+      #print(words)
+      #print('@@@@@')
+      #print(flag_label)
+      row_number = -1
+      for i in range(len(preds)):
+          #print('i is: {}'.format(i))
+          if (preds[i] == flag_label):
+              row_number = row_number + 1
+              df.at[row_number, preds[i]] = words[i]
+              #print('row number is: {}'.format(row_number))
+              continue
+          else:
+            #print('row_number {} is <= of df.shape {}'.format(row_number, df.shape[0]))
+            #print(pd.isna(df[preds[i]].iloc[row_number]))
+            #print(pd.isna(df[preds[i]].iloc[row_number]))
+            if(pd.isna(df[preds[i]].iloc[row_number])):
+              df.at[row_number, preds[i]] = words[i]
+            else:
+              row_number = row_number + 1
+              df.at[row_number, preds[i]] = words[i]
   return df
 def process_form(preds, words, bboxes):
+  final_bbox, final_preds, final_words = mergeCloseBoxes(preds, bboxes, words, 30)
   _bbox, _preds, _words = removeSimilarItems(final_bbox, final_preds, final_words)
   # convert float list to int
   _bbox = [[int(x) for x in item ] for item in _bbox]
   imgs_comb.save( 'Trifecta_vertical.jpg' )
   return imgs_comb
 def completepreprocess(pdffile):
   myDataFrame = pd.DataFrame()
   a=[]
     images = Image.open("page"+str(i)+".jpg")
     image = images.convert("RGB")
     bbox, preds, words, image = process_image(image)
+    print(preds)
+    print(words)
     im, df = visualize_image(bbox, preds, words, image)
     im1 = im.save("page"+str(i)+".jpg")
     a.append("page"+str(i)+".jpg")
 # ["744BJQ69.PDF"], ['tarros_2.jpg'],
 #examples = [['test1.jpg'], ['doc1.pdf'], ['doc1.2.pdf']]
 iface = gr.Interface(fn=completepreprocess,
                      #inputs=gr.inputs.Image(type="pil",optional=True,label="upload file"),
                      inputs=gr.File(label="PDF"),