Dileep7729 commited on
Commit
7fe9789
·
verified ·
1 Parent(s): 96f8896

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -0
app.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import LayoutLMv3Processor, LayoutLMv3ForTokenClassification
4
+ import pytesseract
5
+
6
+ # Set the Tesseract executable path (for Windows users)
7
+ import pytesseract
8
+ pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
9
+
10
+
11
+ # Load the model and processor
12
+ processor = LayoutLMv3Processor.from_pretrained("quadranttechnologies/Table_OCR")
13
+ model = LayoutLMv3ForTokenClassification.from_pretrained("quadranttechnologies/Table_OCR")
14
+ model.eval()
15
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
16
+ model.to(device)
17
+
18
+ def process_image(image):
19
+ try:
20
+ # Preprocess the image using the processor
21
+ encoding = processor(image, return_tensors="pt", truncation=True, padding="max_length", max_length=512)
22
+
23
+ # Move inputs to the same device as the model
24
+ encoding = {key: val.to(device) for key, val in encoding.items()}
25
+
26
+ # Perform inference
27
+ with torch.no_grad():
28
+ outputs = model(**encoding)
29
+ predictions = torch.argmax(outputs.logits, dim=-1)
30
+
31
+ # Extract input IDs, bounding boxes, and predicted labels
32
+ words = encoding["input_ids"]
33
+ bboxes = encoding["bbox"]
34
+ labels = predictions.squeeze().tolist()
35
+
36
+ # Format output as JSON
37
+ structured_output = []
38
+ for word_id, bbox, label in zip(words.squeeze().tolist(), bboxes.squeeze().tolist(), labels):
39
+ # Decode the word ID to text
40
+ word = processor.tokenizer.decode([word_id]).strip()
41
+ if word: # Avoid adding empty words
42
+ structured_output.append({
43
+ "word": word,
44
+ "bounding_box": bbox,
45
+ "label": model.config.id2label[label] # Convert label ID to label name
46
+ })
47
+
48
+ return structured_output
49
+
50
+ except Exception as e:
51
+ return {"error": str(e)} # Return error details if any issue occurs
52
+
53
+ # Define the Gradio interface
54
+ interface = gr.Interface(
55
+ fn=process_image,
56
+ inputs=gr.Image(type="pil"), # Accepts image input
57
+ outputs="json", # Outputs JSON structure
58
+ title="Table OCR",
59
+ description="Upload an image (e.g., receipt or document) to extract structured information in JSON format."
60
+ )
61
+
62
+ # Launch the app
63
+ if __name__ == "__main__":
64
+ interface.launch(share=True)
65
+
66
+
67
+
68
+