Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -21,13 +21,12 @@ from PIL import Image, ImageDraw, ImageFont
|
|
21 |
processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=True)
|
22 |
model = AutoModelForTokenClassification.from_pretrained("Theivaprakasham/layoutlmv3-finetuned-invoice")
|
23 |
|
24 |
-
|
25 |
-
|
26 |
# load image example
|
27 |
dataset = load_dataset("darentang/generated", split="test")
|
28 |
Image.open(dataset[2]["image_path"]).convert("RGB").save("example1.png")
|
29 |
Image.open(dataset[1]["image_path"]).convert("RGB").save("example2.png")
|
30 |
Image.open(dataset[0]["image_path"]).convert("RGB").save("example3.png")
|
|
|
31 |
# define id2label, label2color
|
32 |
labels = dataset.features['ner_tags'].feature.names
|
33 |
id2label = {v: k for v, k in enumerate(labels)}
|
@@ -44,33 +43,22 @@ label2color = {
|
|
44 |
"B-TOTAL": 'blue',
|
45 |
"I-BILLER_ADDRESS": 'blue',
|
46 |
"O": 'orange'
|
47 |
-
|
48 |
|
49 |
def unnormalize_box(bbox, width, height):
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
|
57 |
|
58 |
def iob_to_label(label):
|
59 |
return label
|
60 |
|
61 |
|
62 |
-
def
|
63 |
-
width, height = image.size
|
64 |
-
content_list = []
|
65 |
-
for prediction, box, tag in zip(predictions, token_boxes, ner_tags):
|
66 |
-
if not is_subword[idx]:
|
67 |
-
predicted_label = iob_to_label(prediction)
|
68 |
-
content = image.crop(box).copy().convert("RGB")
|
69 |
-
content_list.append((predicted_label, tag, content))
|
70 |
-
return content_list
|
71 |
-
|
72 |
-
|
73 |
-
def process_image(image, ner_tags):
|
74 |
width, height = image.size
|
75 |
|
76 |
# encode
|
@@ -92,18 +80,18 @@ def process_image(image, ner_tags):
|
|
92 |
# draw predictions over the image
|
93 |
draw = ImageDraw.Draw(image)
|
94 |
font = ImageFont.load_default()
|
95 |
-
for prediction, box
|
96 |
predicted_label = iob_to_label(prediction)
|
97 |
draw.rectangle(box, outline=label2color[predicted_label])
|
98 |
-
draw.text((box[0]+10, box[1]
|
99 |
-
|
100 |
return image
|
101 |
|
102 |
|
103 |
title = "Invoice Information extraction using LayoutLMv3 model"
|
104 |
description = "Invoice Information Extraction - We use Microsoft's LayoutLMv3 trained on Invoice Dataset to predict the Biller Name, Biller Address, Biller post_code, Due_date, GST, Invoice_date, Invoice_number, Subtotal and Total. To use it, simply upload an image or use the example image below. Results will show up in a few seconds."
|
105 |
|
106 |
-
article="<b>References</b><br>[1] Y. Xu et al., “LayoutLMv3: Pre-training for Document AI with Unified Text and Image Masking.” 2022. <a href='https://arxiv.org/abs/2204.08387'>Paper Link</a><br>[2] <a href='https://github.com/NielsRogge/Transformers-Tutorials/tree/master/LayoutLMv3'>LayoutLMv3 training and inference</a>"
|
107 |
|
108 |
examples =[['example1.png'],['example2.png'],['example3.png']]
|
109 |
|
@@ -117,6 +105,7 @@ iface = gr.Interface(fn=process_image,
|
|
117 |
article=article,
|
118 |
examples=examples,
|
119 |
css=css,
|
120 |
-
analytics_enabled
|
|
|
121 |
|
122 |
iface.launch(inline=False, share=False, debug=False)
|
|
|
21 |
processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=True)
|
22 |
model = AutoModelForTokenClassification.from_pretrained("Theivaprakasham/layoutlmv3-finetuned-invoice")
|
23 |
|
|
|
|
|
24 |
# load image example
|
25 |
dataset = load_dataset("darentang/generated", split="test")
|
26 |
Image.open(dataset[2]["image_path"]).convert("RGB").save("example1.png")
|
27 |
Image.open(dataset[1]["image_path"]).convert("RGB").save("example2.png")
|
28 |
Image.open(dataset[0]["image_path"]).convert("RGB").save("example3.png")
|
29 |
+
|
30 |
# define id2label, label2color
|
31 |
labels = dataset.features['ner_tags'].feature.names
|
32 |
id2label = {v: k for v, k in enumerate(labels)}
|
|
|
43 |
"B-TOTAL": 'blue',
|
44 |
"I-BILLER_ADDRESS": 'blue',
|
45 |
"O": 'orange'
|
46 |
+
}
|
47 |
|
48 |
def unnormalize_box(bbox, width, height):
|
49 |
+
return [
|
50 |
+
width * (bbox[0] / 1000),
|
51 |
+
height * (bbox[1] / 1000),
|
52 |
+
width * (bbox[2] / 1000),
|
53 |
+
height * (bbox[3] / 1000),
|
54 |
+
]
|
55 |
|
56 |
|
57 |
def iob_to_label(label):
|
58 |
return label
|
59 |
|
60 |
|
61 |
+
def process_image(image):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
width, height = image.size
|
63 |
|
64 |
# encode
|
|
|
80 |
# draw predictions over the image
|
81 |
draw = ImageDraw.Draw(image)
|
82 |
font = ImageFont.load_default()
|
83 |
+
for idx, (prediction, box) in enumerate(zip(true_predictions, true_boxes)):
|
84 |
predicted_label = iob_to_label(prediction)
|
85 |
draw.rectangle(box, outline=label2color[predicted_label])
|
86 |
+
draw.text((box[0]+10, box[1]-10), text=dataset['ner_tags'][idx], fill='black', font=font)
|
87 |
+
|
88 |
return image
|
89 |
|
90 |
|
91 |
title = "Invoice Information extraction using LayoutLMv3 model"
|
92 |
description = "Invoice Information Extraction - We use Microsoft's LayoutLMv3 trained on Invoice Dataset to predict the Biller Name, Biller Address, Biller post_code, Due_date, GST, Invoice_date, Invoice_number, Subtotal and Total. To use it, simply upload an image or use the example image below. Results will show up in a few seconds."
|
93 |
|
94 |
+
article="<b>References</b><br>[1] Y. Xu et al., “LayoutLMv3: Pre-training for Document AI with Unified Text and Image Masking.” 2022. <a href='https://arxiv.org/abs/2204.08387'>Paper Link</a><br>[2] <a href='https://github.com/NielsRogge/Transformers-Tutorials/tree/master/LayoutLMv3'>LayoutLMv3 training and inference</a>"
|
95 |
|
96 |
examples =[['example1.png'],['example2.png'],['example3.png']]
|
97 |
|
|
|
105 |
article=article,
|
106 |
examples=examples,
|
107 |
css=css,
|
108 |
+
analytics_enabled=True,
|
109 |
+
enable_queue=True)
|
110 |
|
111 |
iface.launch(inline=False, share=False, debug=False)
|