Spaces:

keldrenloy
/

MLWorkSpace

Runtime error

App Files Files Community

keldrenloy commited on Oct 12, 2022

Commit

acfcb3a

1 Parent(s): 8aa0e27

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -12

app.py CHANGED Viewed

@@ -80,7 +80,6 @@ def unnormalize_box(bbox, width, height):
      ]
 def predict(image):
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     model = LayoutLMv3ForTokenClassification.from_pretrained("keldrenloy/layoutlmv3cordfinetuned").to(device) #add your model directory here
     processor = LayoutLMv3Processor.from_pretrained("microsoft/layoutlmv3-base")
     label_list,id2label,label2id, num_labels = convert_l2n_n2l(dataset)
@@ -89,7 +88,7 @@ def predict(image):
     encoding_inputs = processor(image,return_offsets_mapping=True, return_tensors="pt",truncation = True)
     offset_mapping = encoding_inputs.pop('offset_mapping')
     for k,v in encoding_inputs.items():
-        encoding_inputs[k] = v.to(device)
     with torch.no_grad():
         outputs = model(**encoding_inputs)
@@ -101,14 +100,6 @@ def predict(image):
     true_predictions = [id2label[pred] for idx, pred in enumerate(predictions) if not is_subword[idx]]
     true_boxes = [unnormalize_box(box, width, height) for idx, box in enumerate(token_boxes) if not is_subword[idx]]
-    return true_boxes, true_predictions
-def text_extraction(image):
-    feature_extractor = LayoutLMv3FeatureExtractor()
-    encoding = feature_extractor(image, return_tensors="pt")
-    return encoding['words'][0]
-def image_render(image):
     draw = ImageDraw.Draw(image)
     font = ImageFont.load_default()
     true_boxes,true_predictions = predict(image)
@@ -122,10 +113,15 @@ def image_render(image):
     extracted_words = convert_results(words,true_predictions)
     return image,extracted_words
 css = """.output_image, .input_image {height: 600px !important}"""
-demo = gr.Interface(fn = image_render,
                     inputs = gr.inputs.Image(type="pil"),
                     outputs = [gr.outputs.Image(type="pil", label="annotated image"),'text'],
                     css = css,
@@ -136,4 +132,4 @@ demo = gr.Interface(fn = image_render,
                     flagging_dir = "flagged",
                     analytics_enabled = True, enable_queue=True
                    )
-demo.launch(inline=False, share=False, debug=False)

      ]
 def predict(image):
     model = LayoutLMv3ForTokenClassification.from_pretrained("keldrenloy/layoutlmv3cordfinetuned").to(device) #add your model directory here
     processor = LayoutLMv3Processor.from_pretrained("microsoft/layoutlmv3-base")
     label_list,id2label,label2id, num_labels = convert_l2n_n2l(dataset)
     encoding_inputs = processor(image,return_offsets_mapping=True, return_tensors="pt",truncation = True)
     offset_mapping = encoding_inputs.pop('offset_mapping')
     for k,v in encoding_inputs.items():
+        encoding_inputs[k] = v
     with torch.no_grad():
         outputs = model(**encoding_inputs)
     true_predictions = [id2label[pred] for idx, pred in enumerate(predictions) if not is_subword[idx]]
     true_boxes = [unnormalize_box(box, width, height) for idx, box in enumerate(token_boxes) if not is_subword[idx]]
     draw = ImageDraw.Draw(image)
     font = ImageFont.load_default()
     true_boxes,true_predictions = predict(image)
     extracted_words = convert_results(words,true_predictions)
     return image,extracted_words
+def text_extraction(image):
+    feature_extractor = LayoutLMv3FeatureExtractor()
+    encoding = feature_extractor(image, return_tensors="pt")
+    return encoding['words'][0]
 css = """.output_image, .input_image {height: 600px !important}"""
+demo = gr.Interface(fn = predict,
                     inputs = gr.inputs.Image(type="pil"),
                     outputs = [gr.outputs.Image(type="pil", label="annotated image"),'text'],
                     css = css,
                     flagging_dir = "flagged",
                     analytics_enabled = True, enable_queue=True
                    )
+demo.launch(debug=False)