phamvi856 commited on
Commit
042436f
·
1 Parent(s): 403dbbc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -26
app.py CHANGED
@@ -21,13 +21,12 @@ from PIL import Image, ImageDraw, ImageFont
21
  processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=True)
22
  model = AutoModelForTokenClassification.from_pretrained("Theivaprakasham/layoutlmv3-finetuned-invoice")
23
 
24
-
25
-
26
  # load image example
27
  dataset = load_dataset("darentang/generated", split="test")
28
  Image.open(dataset[2]["image_path"]).convert("RGB").save("example1.png")
29
  Image.open(dataset[1]["image_path"]).convert("RGB").save("example2.png")
30
  Image.open(dataset[0]["image_path"]).convert("RGB").save("example3.png")
 
31
  # define id2label, label2color
32
  labels = dataset.features['ner_tags'].feature.names
33
  id2label = {v: k for v, k in enumerate(labels)}
@@ -44,33 +43,22 @@ label2color = {
44
  "B-TOTAL": 'blue',
45
  "I-BILLER_ADDRESS": 'blue',
46
  "O": 'orange'
47
- }
48
 
49
  def unnormalize_box(bbox, width, height):
50
- return [
51
- width * (bbox[0] / 1000),
52
- height * (bbox[1] / 1000),
53
- width * (bbox[2] / 1000),
54
- height * (bbox[3] / 1000),
55
- ]
56
 
57
 
58
  def iob_to_label(label):
59
  return label
60
 
61
 
62
- def print_content(image, predictions, token_boxes, ner_tags):
63
- width, height = image.size
64
- content_list = []
65
- for prediction, box, tag in zip(predictions, token_boxes, ner_tags):
66
- if not is_subword[idx]:
67
- predicted_label = iob_to_label(prediction)
68
- content = image.crop(box).copy().convert("RGB")
69
- content_list.append((predicted_label, tag, content))
70
- return content_list
71
-
72
-
73
- def process_image(image, ner_tags):
74
  width, height = image.size
75
 
76
  # encode
@@ -92,18 +80,18 @@ def process_image(image, ner_tags):
92
  # draw predictions over the image
93
  draw = ImageDraw.Draw(image)
94
  font = ImageFont.load_default()
95
- for prediction, box, ner_tag in zip(true_predictions, true_boxes, ner_tags):
96
  predicted_label = iob_to_label(prediction)
97
  draw.rectangle(box, outline=label2color[predicted_label])
98
- draw.text((box[0]+10, box[1]+10), text=ner_tag, fill='black', font=font)
99
-
100
  return image
101
 
102
 
103
  title = "Invoice Information extraction using LayoutLMv3 model"
104
  description = "Invoice Information Extraction - We use Microsoft's LayoutLMv3 trained on Invoice Dataset to predict the Biller Name, Biller Address, Biller post_code, Due_date, GST, Invoice_date, Invoice_number, Subtotal and Total. To use it, simply upload an image or use the example image below. Results will show up in a few seconds."
105
 
106
- article="<b>References</b><br>[1] Y. Xu et al., “LayoutLMv3: Pre-training for Document AI with Unified Text and Image Masking.” 2022. <a href='https://arxiv.org/abs/2204.08387'>Paper Link</a><br>[2] <a href='https://github.com/NielsRogge/Transformers-Tutorials/tree/master/LayoutLMv3'>LayoutLMv3 training and inference</a>"
107
 
108
  examples =[['example1.png'],['example2.png'],['example3.png']]
109
 
@@ -117,6 +105,7 @@ iface = gr.Interface(fn=process_image,
117
  article=article,
118
  examples=examples,
119
  css=css,
120
- analytics_enabled = True, enable_queue=True)
 
121
 
122
  iface.launch(inline=False, share=False, debug=False)
 
21
  processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=True)
22
  model = AutoModelForTokenClassification.from_pretrained("Theivaprakasham/layoutlmv3-finetuned-invoice")
23
 
 
 
24
  # load image example
25
  dataset = load_dataset("darentang/generated", split="test")
26
  Image.open(dataset[2]["image_path"]).convert("RGB").save("example1.png")
27
  Image.open(dataset[1]["image_path"]).convert("RGB").save("example2.png")
28
  Image.open(dataset[0]["image_path"]).convert("RGB").save("example3.png")
29
+
30
  # define id2label, label2color
31
  labels = dataset.features['ner_tags'].feature.names
32
  id2label = {v: k for v, k in enumerate(labels)}
 
43
  "B-TOTAL": 'blue',
44
  "I-BILLER_ADDRESS": 'blue',
45
  "O": 'orange'
46
+ }
47
 
48
  def unnormalize_box(bbox, width, height):
49
+ return [
50
+ width * (bbox[0] / 1000),
51
+ height * (bbox[1] / 1000),
52
+ width * (bbox[2] / 1000),
53
+ height * (bbox[3] / 1000),
54
+ ]
55
 
56
 
57
  def iob_to_label(label):
58
  return label
59
 
60
 
61
+ def process_image(image):
 
 
 
 
 
 
 
 
 
 
 
62
  width, height = image.size
63
 
64
  # encode
 
80
  # draw predictions over the image
81
  draw = ImageDraw.Draw(image)
82
  font = ImageFont.load_default()
83
+ for idx, (prediction, box) in enumerate(zip(true_predictions, true_boxes)):
84
  predicted_label = iob_to_label(prediction)
85
  draw.rectangle(box, outline=label2color[predicted_label])
86
+ draw.text((box[0]+10, box[1]-10), text=dataset['ner_tags'][idx], fill='black', font=font)
87
+
88
  return image
89
 
90
 
91
  title = "Invoice Information extraction using LayoutLMv3 model"
92
  description = "Invoice Information Extraction - We use Microsoft's LayoutLMv3 trained on Invoice Dataset to predict the Biller Name, Biller Address, Biller post_code, Due_date, GST, Invoice_date, Invoice_number, Subtotal and Total. To use it, simply upload an image or use the example image below. Results will show up in a few seconds."
93
 
94
+ article="<b>References</b><br>[1] Y. Xu et al., “LayoutLMv3: Pre-training for Document AI with Unified Text and Image Masking.” 2022. <a href='https://arxiv.org/abs/2204.08387'>Paper Link</a><br>[2] <a href='https://github.com/NielsRogge/Transformers-Tutorials/tree/master/LayoutLMv3'>LayoutLMv3 training and inference</a>"
95
 
96
  examples =[['example1.png'],['example2.png'],['example3.png']]
97
 
 
105
  article=article,
106
  examples=examples,
107
  css=css,
108
+ analytics_enabled=True,
109
+ enable_queue=True)
110
 
111
  iface.launch(inline=False, share=False, debug=False)