Spaces:

quadranttechnologies
/

Receipt_Image_Analyzer

Sleeping

App Files Files Community

Dileep7729 commited on Jan 4

Commit

92992b8

verified ·

1 Parent(s): ac925fe

Upload app.py

Browse files

Files changed (1) hide show

app.py +55 -0

app.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import gradio as gr
+import torch
+from transformers import LayoutLMv3Processor, LayoutLMv3ForTokenClassification
+# Load the model and processor
+processor = LayoutLMv3Processor.from_pretrained("quadranttechnologies/Table_OCR")
+model = LayoutLMv3ForTokenClassification.from_pretrained("quadranttechnologies/Table_OCR")
+model.eval()
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model.to(device)
+def process_image(image):
+    # Preprocess the image using the processor
+    encoding = processor(image, return_tensors="pt", truncation=True, padding="max_length", max_length=512)
+    # Move inputs to the same device as the model
+    encoding = {key: val.to(device) for key, val in encoding.items()}
+    # Perform inference
+    with torch.no_grad():
+        outputs = model(**encoding)
+        predictions = torch.argmax(outputs.logits, dim=-1)
+    # Extract input IDs, bounding boxes, and predicted labels
+    words = encoding["input_ids"]
+    bboxes = encoding["bbox"]
+    labels = predictions.squeeze().tolist()
+    # Format output as JSON
+    structured_output = []
+    for word_id, bbox, label in zip(words.squeeze().tolist(), bboxes.squeeze().tolist(), labels):
+        # Decode the word ID to text
+        word = processor.tokenizer.decode([word_id]).strip()
+        if word:  # Avoid adding empty words
+            structured_output.append({
+                "word": word,
+                "bounding_box": bbox,
+                "label": model.config.id2label[label]  # Convert label ID to label name
+            })
+    return structured_output
+# Define the Gradio interface
+interface = gr.Interface(
+    fn=process_image,
+    inputs=gr.Image(type="pil"),  # Accepts image input
+    outputs="json"  # Outputs JSON structure
+)
+# Launch the app
+if __name__ == "__main__":
+    interface.launch(share=False)