layoutlmv3_invoice

Runtime error

phamvi856 commited on Jun 22, 2023

Commit

391f127

1 Parent(s): d406dd3

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -58,15 +58,17 @@ def process_image(image):
     input_ids = encoding.input_ids.to(device)
     attention_mask = encoding.attention_mask.to(device)
     bbox = encoding.bbox[0].tolist()
-    bbox = torch.tensor(bbox, dtype=torch.float32).unsqueeze(0).to(device)
     # Inference
-    outputs = model(input_ids=input_ids, bbox=bbox, attention_mask=attention_mask)
     predicted_labels = outputs.logits.argmax(dim=2).squeeze().tolist()
     # Extract content from boxes
     extracted_content = {}
-    for idx, box in enumerate(bbox):
         predicted_label = id2label[predicted_labels[idx]]
         box_width = np.array(box)[2] - np.array(box)[0]
         box_height = np.array(box)[3] - np.array(box)[1]
@@ -76,7 +78,7 @@ def process_image(image):
     # Draw predictions over the image
     draw = ImageDraw.Draw(image)
     font = ImageFont.load_default()
-    for prediction, box in zip(predicted_labels, bbox):
         predicted_label = iob_to_label(id2label[prediction])
         box_width = np.array(box)[2] - np.array(box)[0]
         box_height = np.array(box)[3] - np.array(box)[1]
@@ -108,3 +110,4 @@ iface = gr.Interface(fn=process_image,
 iface.launch(inline=False, share=False, debug=False)

     input_ids = encoding.input_ids.to(device)
     attention_mask = encoding.attention_mask.to(device)
     bbox = encoding.bbox[0].tolist()
+    bbox = torch.tensor(bbox, dtype=torch.long).unsqueeze(0).to(device)
     # Inference
+    with torch.no_grad():
+        outputs = model(input_ids=input_ids, bbox=bbox, attention_mask=attention_mask)
     predicted_labels = outputs.logits.argmax(dim=2).squeeze().tolist()
     # Extract content from boxes
     extracted_content = {}
+    for idx, box in enumerate(bbox[0]):
         predicted_label = id2label[predicted_labels[idx]]
         box_width = np.array(box)[2] - np.array(box)[0]
         box_height = np.array(box)[3] - np.array(box)[1]
     # Draw predictions over the image
     draw = ImageDraw.Draw(image)
     font = ImageFont.load_default()
+    for prediction, box in zip(predicted_labels, bbox[0]):
         predicted_label = iob_to_label(id2label[prediction])
         box_width = np.array(box)[2] - np.array(box)[0]
         box_height = np.array(box)[3] - np.array(box)[1]
 iface.launch(inline=False, share=False, debug=False)