|
import gradio as gr |
|
from transformers import LayoutLMv3Processor, LayoutLMv3ForTokenClassification |
|
from PIL import Image |
|
import os |
|
os.system("which tesseract") |
|
|
|
processor = LayoutLMv3Processor.from_pretrained("microsoft/layoutlmv3-base") |
|
model = LayoutLMv3ForTokenClassification.from_pretrained("microsoft/layoutlmv3-base", num_labels=5) |
|
|
|
def extract_invoice_data(image_path): |
|
|
|
image = Image.open(image_path).convert("RGB") |
|
encoding = processor(image, return_tensors="pt") |
|
outputs = model(**encoding) |
|
predictions = outputs.logits.argmax(-1).squeeze().tolist() |
|
return {"Predictions": predictions} |
|
|
|
|
|
interface = gr.Interface( |
|
fn=extract_invoice_data, |
|
inputs=gr.Image(type="filepath"), |
|
outputs="json", |
|
title="Invoice Data Extraction" |
|
) |
|
|
|
interface.launch() |
|
|