Dileep7729 commited on
Commit
af8d340
·
verified ·
1 Parent(s): f37d3f8

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -0
app.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import LayoutLMv3Processor, LayoutLMv3ForTokenClassification
4
+ import pytesseract
5
+
6
+ # Set the Tesseract executable path (for Windows users)
7
+ pytesseract.pytesseract.tesseract_cmd = r"C:\\Program Files\\Tesseract-OCR\\tesseract.exe"
8
+
9
+ # Load the model and processor
10
+ processor = LayoutLMv3Processor.from_pretrained("quadranttechnologies/Table_OCR")
11
+ model = LayoutLMv3ForTokenClassification.from_pretrained("quadranttechnologies/Table_OCR")
12
+ model.eval()
13
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
14
+ model.to(device)
15
+
16
+ def process_image(image):
17
+ try:
18
+ # Preprocess the image using the processor
19
+ encoding = processor(image, return_tensors="pt", truncation=True, padding="max_length", max_length=512)
20
+
21
+ # Move inputs to the same device as the model
22
+ encoding = {key: val.to(device) for key, val in encoding.items()}
23
+
24
+ # Perform inference
25
+ with torch.no_grad():
26
+ outputs = model(**encoding)
27
+ predictions = torch.argmax(outputs.logits, dim=-1)
28
+
29
+ # Extract input IDs, bounding boxes, and predicted labels
30
+ words = encoding["input_ids"]
31
+ bboxes = encoding["bbox"]
32
+ labels = predictions.squeeze().tolist()
33
+
34
+ # Format output as JSON
35
+ structured_output = []
36
+ for word_id, bbox, label in zip(words.squeeze().tolist(), bboxes.squeeze().tolist(), labels):
37
+ # Decode the word ID to text
38
+ word = processor.tokenizer.decode([word_id]).strip()
39
+ if word: # Avoid adding empty words
40
+ structured_output.append({
41
+ "word": word,
42
+ "bounding_box": bbox,
43
+ "label": model.config.id2label[label] # Convert label ID to label name
44
+ })
45
+
46
+ return structured_output
47
+
48
+ except Exception as e:
49
+ return {"error": str(e)} # Return error details if any issue occurs
50
+
51
+ # Define the Gradio interface
52
+ interface = gr.Interface(
53
+ fn=process_image,
54
+ inputs=gr.Image(type="pil"), # Accepts image input
55
+ outputs="json", # Outputs JSON structure
56
+ title="Table OCR",
57
+ description="Upload an image (e.g., receipt or document) to extract structured information in JSON format."
58
+ )
59
+
60
+ # Launch the app
61
+ if __name__ == "__main__":
62
+ interface.launch(share=True)
63
+
64
+
65
+
66
+