Dileep7729 commited on
Commit
92992b8
·
verified ·
1 Parent(s): ac925fe

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -0
app.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import LayoutLMv3Processor, LayoutLMv3ForTokenClassification
4
+
5
+ # Load the model and processor
6
+ processor = LayoutLMv3Processor.from_pretrained("quadranttechnologies/Table_OCR")
7
+ model = LayoutLMv3ForTokenClassification.from_pretrained("quadranttechnologies/Table_OCR")
8
+ model.eval()
9
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
10
+ model.to(device)
11
+
12
+ def process_image(image):
13
+ # Preprocess the image using the processor
14
+ encoding = processor(image, return_tensors="pt", truncation=True, padding="max_length", max_length=512)
15
+
16
+ # Move inputs to the same device as the model
17
+ encoding = {key: val.to(device) for key, val in encoding.items()}
18
+
19
+ # Perform inference
20
+ with torch.no_grad():
21
+ outputs = model(**encoding)
22
+ predictions = torch.argmax(outputs.logits, dim=-1)
23
+
24
+ # Extract input IDs, bounding boxes, and predicted labels
25
+ words = encoding["input_ids"]
26
+ bboxes = encoding["bbox"]
27
+ labels = predictions.squeeze().tolist()
28
+
29
+ # Format output as JSON
30
+ structured_output = []
31
+ for word_id, bbox, label in zip(words.squeeze().tolist(), bboxes.squeeze().tolist(), labels):
32
+ # Decode the word ID to text
33
+ word = processor.tokenizer.decode([word_id]).strip()
34
+ if word: # Avoid adding empty words
35
+ structured_output.append({
36
+ "word": word,
37
+ "bounding_box": bbox,
38
+ "label": model.config.id2label[label] # Convert label ID to label name
39
+ })
40
+
41
+ return structured_output
42
+
43
+ # Define the Gradio interface
44
+ interface = gr.Interface(
45
+ fn=process_image,
46
+ inputs=gr.Image(type="pil"), # Accepts image input
47
+ outputs="json" # Outputs JSON structure
48
+ )
49
+
50
+ # Launch the app
51
+ if __name__ == "__main__":
52
+ interface.launch(share=False)
53
+
54
+
55
+