layoutlmv3_invoice

Runtime error

App Files Files Community

phamvi856 commited on Jun 22, 2023

Commit

042436f

1 Parent(s): 403dbbc

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -26

app.py CHANGED Viewed

@@ -21,13 +21,12 @@ from PIL import Image, ImageDraw, ImageFont
 processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=True)
 model = AutoModelForTokenClassification.from_pretrained("Theivaprakasham/layoutlmv3-finetuned-invoice")
 # load image example
 dataset = load_dataset("darentang/generated", split="test")
 Image.open(dataset[2]["image_path"]).convert("RGB").save("example1.png")
 Image.open(dataset[1]["image_path"]).convert("RGB").save("example2.png")
 Image.open(dataset[0]["image_path"]).convert("RGB").save("example3.png")
 # define id2label, label2color
 labels = dataset.features['ner_tags'].feature.names
 id2label = {v: k for v, k in enumerate(labels)}
@@ -44,33 +43,22 @@ label2color = {
     "B-TOTAL": 'blue',
     "I-BILLER_ADDRESS": 'blue',
     "O": 'orange'
-  }
 def unnormalize_box(bbox, width, height):
-     return [
-         width * (bbox[0] / 1000),
-         height * (bbox[1] / 1000),
-         width * (bbox[2] / 1000),
-         height * (bbox[3] / 1000),
-     ]
 def iob_to_label(label):
     return label
-def print_content(image, predictions, token_boxes, ner_tags):
-    width, height = image.size
-    content_list = []
-    for prediction, box, tag in zip(predictions, token_boxes, ner_tags):
-        if not is_subword[idx]:
-            predicted_label = iob_to_label(prediction)
-            content = image.crop(box).copy().convert("RGB")
-            content_list.append((predicted_label, tag, content))
-    return content_list
-def process_image(image, ner_tags):
     width, height = image.size
     # encode
@@ -92,18 +80,18 @@ def process_image(image, ner_tags):
     # draw predictions over the image
     draw = ImageDraw.Draw(image)
     font = ImageFont.load_default()
-    for prediction, box, ner_tag in zip(true_predictions, true_boxes, ner_tags):
         predicted_label = iob_to_label(prediction)
         draw.rectangle(box, outline=label2color[predicted_label])
-        draw.text((box[0]+10, box[1]+10), text=ner_tag, fill='black', font=font)
     return image
 title = "Invoice Information extraction using LayoutLMv3 model"
 description = "Invoice Information Extraction - We use Microsoft's LayoutLMv3 trained on Invoice Dataset to predict the Biller Name, Biller Address, Biller post_code, Due_date, GST, Invoice_date, Invoice_number, Subtotal and Total. To use it, simply upload an image or use the example image below. Results will show up in a few seconds."
-article="<b>References</b><br>[1] Y. Xu et al., “LayoutLMv3: Pre-training for Document AI with Unified Text and Image Masking.” 2022. <a href='https://arxiv.org/abs/2204.08387'>Paper Link</a><br>[2]  <a href='https://github.com/NielsRogge/Transformers-Tutorials/tree/master/LayoutLMv3'>LayoutLMv3 training and inference</a>"
 examples =[['example1.png'],['example2.png'],['example3.png']]
@@ -117,6 +105,7 @@ iface = gr.Interface(fn=process_image,
                      article=article,
                      examples=examples,
                      css=css,
-                     analytics_enabled = True, enable_queue=True)
 iface.launch(inline=False, share=False, debug=False)

 processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=True)
 model = AutoModelForTokenClassification.from_pretrained("Theivaprakasham/layoutlmv3-finetuned-invoice")
 # load image example
 dataset = load_dataset("darentang/generated", split="test")
 Image.open(dataset[2]["image_path"]).convert("RGB").save("example1.png")
 Image.open(dataset[1]["image_path"]).convert("RGB").save("example2.png")
 Image.open(dataset[0]["image_path"]).convert("RGB").save("example3.png")
 # define id2label, label2color
 labels = dataset.features['ner_tags'].feature.names
 id2label = {v: k for v, k in enumerate(labels)}
     "B-TOTAL": 'blue',
     "I-BILLER_ADDRESS": 'blue',
     "O": 'orange'
+}
 def unnormalize_box(bbox, width, height):
+    return [
+        width * (bbox[0] / 1000),
+        height * (bbox[1] / 1000),
+        width * (bbox[2] / 1000),
+        height * (bbox[3] / 1000),
+    ]
 def iob_to_label(label):
     return label
+def process_image(image):
     width, height = image.size
     # encode
     # draw predictions over the image
     draw = ImageDraw.Draw(image)
     font = ImageFont.load_default()
+    for idx, (prediction, box) in enumerate(zip(true_predictions, true_boxes)):
         predicted_label = iob_to_label(prediction)
         draw.rectangle(box, outline=label2color[predicted_label])
+        draw.text((box[0]+10, box[1]-10), text=dataset['ner_tags'][idx], fill='black', font=font)
     return image
 title = "Invoice Information extraction using LayoutLMv3 model"
 description = "Invoice Information Extraction - We use Microsoft's LayoutLMv3 trained on Invoice Dataset to predict the Biller Name, Biller Address, Biller post_code, Due_date, GST, Invoice_date, Invoice_number, Subtotal and Total. To use it, simply upload an image or use the example image below. Results will show up in a few seconds."
+article="<b>References</b><br>[1] Y. Xu et al., “LayoutLMv3: Pre-training for Document AI with Unified Text and Image Masking.” 2022. <a href='https://arxiv.org/abs/2204.08387'>Paper Link</a><br>[2]  <a href='https://github.com/NielsRogge/Transformers-Tutorials/tree/master/LayoutLMv3'>LayoutLMv3 training and inference</a>"
 examples =[['example1.png'],['example2.png'],['example3.png']]
                      article=article,
                      examples=examples,
                      css=css,
+                     analytics_enabled=True,
+                     enable_queue=True)
 iface.launch(inline=False, share=False, debug=False)