|
import gradio as gr
|
|
from transformers import LayoutLMv3Processor, LayoutLMv3ForTokenClassification
|
|
from PIL import Image
|
|
|
|
|
|
processor = LayoutLMv3Processor.from_pretrained("quadranttechnologies/Table_OCR")
|
|
model = LayoutLMv3ForTokenClassification.from_pretrained("quadranttechnologies/Table_OCR")
|
|
|
|
def predict(image):
|
|
inputs = processor(images=image, return_tensors="pt")
|
|
outputs = model(**inputs)
|
|
predictions = outputs.logits.argmax(-1).squeeze().tolist()
|
|
return {"results": predictions}
|
|
|
|
|
|
iface = gr.Interface(
|
|
fn=predict,
|
|
inputs=gr.Image(type="pil"),
|
|
outputs="json",
|
|
title="Table OCR",
|
|
description="Upload a receipt or document image to extract structured information.",
|
|
)
|
|
|
|
iface.launch()
|
|
|